diff --git a/.asf.yaml b/.asf.yaml index 08837a974e5..00662352a5e 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -39,6 +39,12 @@ github: rebase: true squash: true + collaborators: + - jbonofre + +publish: + whoami: asf-site + notifications: commits: commits@avro.apache.org issues: issues@avro.apache.org diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 00000000000..c2ac39dddf1 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,33 @@ +{ + "name": "Avro Development", + "build": { + "dockerfile": "../share/docker/Dockerfile", + "context": "..", + "options": [ + "--build-arg=BUILDPLATFORM=linux/amd64" // Dockerfile needs a build platfrom argument + ] + }, + "customizations": { + "vscode": { + "settings": { + }, + "extensions": [ + // Python + "ms-python.python", + "ms-python.vscode-pylance", + // C/C++ + "ms-vscode.cpptools", + // C# + "ms-dotnettools.csharp", + // Java + "vscjava.vscode-java-pack", + // Shell script + "timonwong.shellcheck", + // YAML + "redhat.vscode-yaml", + // Git + "eamodio.gitlens" + ] + } + } +} diff --git a/.editorconfig b/.editorconfig index b2d8a7c5fc9..b96e2b9c6e8 100644 --- a/.editorconfig +++ b/.editorconfig @@ -19,13 +19,30 @@ root = true charset = utf-8 end_of_line = lf insert_final_newline = true +ij_any_block_comment_at_first_column = false +ij_any_line_comment_at_first_column = false [*.{java,xml,sh}] indent_style = space indent_size = 2 trim_trailing_whitespace=true -[*.{cs,ps1}] +ij_continuation_indent_size = 4 +ij_java_wrap_comments = true +ij_any_indent_case_from_switch = false + +[*.{avsc,avpr,avdl}] +indent_style = space +indent_size = 2 +trim_trailing_whitespace=true + +ij_continuation_indent_size = 4 +ij_json_space_after_colon = true +ij_json_space_before_colon = true +ij_json_spaces_within_brackets = true +ij_any_array_initializer_wrap = off + +[*.{ps1}] indent_style = space indent_size = 4 trim_trailing_whitespace=true @@ -37,3 +54,174 @@ trim_trailing_whitespace=true [*.py] indent_style = space indent_size = 4 + +# Generated code +[*{_AssemblyInfo.cs,.notsupported.cs,AsmOffsets.cs}] +generated_code = true + +# C# files +[*.cs] +indent_style = space +indent_size = 4 +trim_trailing_whitespace=true + +# New line preferences +csharp_new_line_before_open_brace = all +csharp_new_line_before_else = true +csharp_new_line_before_catch = true +csharp_new_line_before_finally = true +csharp_new_line_before_members_in_object_initializers = true +csharp_new_line_before_members_in_anonymous_types = true +csharp_new_line_between_query_expression_clauses = true + +# Indentation preferences +csharp_indent_block_contents = true +csharp_indent_braces = false +csharp_indent_case_contents = true +csharp_indent_case_contents_when_block = true +csharp_indent_switch_labels = true +csharp_indent_labels = one_less_than_current + +# Modifier preferences +csharp_preferred_modifier_order = public,private,protected,internal,static,extern,new,virtual,abstract,sealed,override,readonly,unsafe,volatile,async:suggestion + +# avoid this. unless absolutely necessary +dotnet_style_qualification_for_field = false:suggestion +dotnet_style_qualification_for_property = false:suggestion +dotnet_style_qualification_for_method = false:suggestion +dotnet_style_qualification_for_event = false:suggestion + +# Types: use keywords instead of BCL types, and permit var only when the type is clear +csharp_style_var_for_built_in_types = false:suggestion +csharp_style_var_when_type_is_apparent = false:none +csharp_style_var_elsewhere = false:suggestion +dotnet_style_predefined_type_for_locals_parameters_members = true:suggestion +dotnet_style_predefined_type_for_member_access = true:suggestion + +# Non-private static fields are PascalCase +dotnet_naming_rule.non_private_static_fields_should_be_pascal_case.severity = suggestion +dotnet_naming_rule.non_private_static_fields_should_be_pascal_case.symbols = non_private_static_fields +dotnet_naming_rule.non_private_static_fields_should_be_pascal_case.style = non_private_static_field_style +dotnet_naming_symbols.non_private_static_fields.applicable_kinds = field +dotnet_naming_symbols.non_private_static_fields.applicable_accessibilities = public, protected, internal, protected_internal, private_protected +dotnet_naming_symbols.non_private_static_fields.required_modifiers = static +dotnet_naming_style.non_private_static_field_style.capitalization = pascal_case + +# Constants are PascalCase +dotnet_naming_rule.constants_should_be_pascal_case.severity = suggestion +dotnet_naming_rule.constants_should_be_pascal_case.symbols = constants +dotnet_naming_rule.constants_should_be_pascal_case.style = constant_style +dotnet_naming_symbols.constants.applicable_kinds = field, local +dotnet_naming_symbols.constants.required_modifiers = const +dotnet_naming_style.constant_style.capitalization = pascal_case + +# Static fields are camelCase and start with s_ +dotnet_naming_rule.static_fields_should_be_camel_case.severity = suggestion +dotnet_naming_rule.static_fields_should_be_camel_case.symbols = static_fields +dotnet_naming_rule.static_fields_should_be_camel_case.style = static_field_style +dotnet_naming_symbols.static_fields.applicable_kinds = field +dotnet_naming_symbols.static_fields.required_modifiers = static +dotnet_naming_style.static_field_style.capitalization = camel_case +dotnet_naming_style.static_field_style.required_prefix = s_ + +# Instance fields are camelCase and start with _ +dotnet_naming_rule.instance_fields_should_be_camel_case.severity = suggestion +dotnet_naming_rule.instance_fields_should_be_camel_case.symbols = instance_fields +dotnet_naming_rule.instance_fields_should_be_camel_case.style = instance_field_style +dotnet_naming_symbols.instance_fields.applicable_kinds = field +dotnet_naming_style.instance_field_style.capitalization = camel_case +dotnet_naming_style.instance_field_style.required_prefix = _ + +# Locals and parameters are camelCase +dotnet_naming_rule.locals_should_be_camel_case.severity = suggestion +dotnet_naming_rule.locals_should_be_camel_case.symbols = locals_and_parameters +dotnet_naming_rule.locals_should_be_camel_case.style = camel_case_style +dotnet_naming_symbols.locals_and_parameters.applicable_kinds = parameter, local +dotnet_naming_style.camel_case_style.capitalization = camel_case + +# Local functions are PascalCase +dotnet_naming_rule.local_functions_should_be_pascal_case.severity = suggestion +dotnet_naming_rule.local_functions_should_be_pascal_case.symbols = local_functions +dotnet_naming_rule.local_functions_should_be_pascal_case.style = local_function_style +dotnet_naming_symbols.local_functions.applicable_kinds = local_function +dotnet_naming_style.local_function_style.capitalization = pascal_case + +# By default, name items with PascalCase +dotnet_naming_rule.members_should_be_pascal_case.severity = suggestion +dotnet_naming_rule.members_should_be_pascal_case.symbols = all_members +dotnet_naming_rule.members_should_be_pascal_case.style = pascal_case_style +dotnet_naming_symbols.all_members.applicable_kinds = * +dotnet_naming_style.pascal_case_style.capitalization = pascal_case + +# Code style defaults +csharp_using_directive_placement = outside_namespace:suggestion +dotnet_sort_system_directives_first = true +csharp_prefer_braces = true:silent +csharp_preserve_single_line_blocks = true:none +csharp_preserve_single_line_statements = false:none +csharp_prefer_static_local_function = true:suggestion +csharp_prefer_simple_using_statement = false:none +csharp_style_prefer_switch_expression = false:none +dotnet_style_readonly_field = true:suggestion + +# Expression-level preferences +dotnet_style_object_initializer = true:suggestion +dotnet_style_collection_initializer = true:suggestion +dotnet_style_explicit_tuple_names = true:suggestion +dotnet_style_coalesce_expression = true:suggestion +dotnet_style_null_propagation = true:suggestion +dotnet_style_prefer_is_null_check_over_reference_equality_method = true:suggestion +dotnet_style_prefer_inferred_tuple_names = true:suggestion +dotnet_style_prefer_inferred_anonymous_type_member_names = true:suggestion +dotnet_style_prefer_auto_properties = true:suggestion +dotnet_style_prefer_conditional_expression_over_assignment = true:silent +dotnet_style_prefer_conditional_expression_over_return = true:silent +csharp_prefer_simple_default_expression = true:suggestion + +# Expression-bodied members +csharp_style_expression_bodied_methods = true:silent +csharp_style_expression_bodied_constructors = true:silent +csharp_style_expression_bodied_operators = true:silent +csharp_style_expression_bodied_properties = true:silent +csharp_style_expression_bodied_indexers = true:silent +csharp_style_expression_bodied_accessors = true:silent +csharp_style_expression_bodied_lambdas = true:silent +csharp_style_expression_bodied_local_functions = true:silent + +# Pattern matching +csharp_style_pattern_matching_over_is_with_cast_check = true:suggestion +csharp_style_pattern_matching_over_as_with_null_check = true:suggestion +csharp_style_inlined_variable_declaration = true:suggestion + +# Null checking preferences +csharp_style_throw_expression = true:suggestion +csharp_style_conditional_delegate_call = true:suggestion + +# Other features +csharp_style_prefer_index_operator = false:none +csharp_style_prefer_range_operator = false:none +csharp_style_pattern_local_over_anonymous_function = false:none + +# Space preferences +csharp_space_after_cast = false +csharp_space_after_colon_in_inheritance_clause = true +csharp_space_after_comma = true +csharp_space_after_dot = false +csharp_space_after_keywords_in_control_flow_statements = true +csharp_space_after_semicolon_in_for_statement = true +csharp_space_around_binary_operators = before_and_after +csharp_space_around_declaration_statements = false +csharp_space_before_colon_in_inheritance_clause = true +csharp_space_before_comma = false +csharp_space_before_dot = false +csharp_space_before_open_square_brackets = false +csharp_space_before_semicolon_in_for_statement = false +csharp_space_between_empty_square_brackets = false +csharp_space_between_method_call_empty_parameter_list_parentheses = false +csharp_space_between_method_call_name_and_opening_parenthesis = false +csharp_space_between_method_call_parameter_list_parentheses = false +csharp_space_between_method_declaration_empty_parameter_list_parentheses = false +csharp_space_between_method_declaration_name_and_open_parenthesis = false +csharp_space_between_method_declaration_parameter_list_parentheses = false +csharp_space_between_parentheses = false +csharp_space_between_square_brackets = false diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000000..b12292b62e4 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,11 @@ +# Set default behavior to automatically normalize line endings. +* text=auto + +# Force bash scripts to always use lf line endings so that if a repo is accessed +# in Unix via a file share from Windows, the scripts will work. +*.sh text eol=lf + +# Force batch scripts to always use crlf line endings so that if a repo is accessed +# in Unix via a file share from Windows, the scripts will work. +*.cmd text eol=crlf +*.bat text eol=crlf diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index f8efdbd7f8e..2823e406003 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,26 +1,60 @@ -Make sure you have checked _all_ steps below. + + +## What is the purpose of the change + +*(For example: This pull request improves file read performance by buffering data, fixing AVRO-XXXX.)* + + +## Verifying this change + +*(Please pick one of the following options)* + +This change is a trivial rework / code cleanup without any test coverage. + +*(or)* + +This change is already covered by existing tests, such as *(please describe tests)*. + +*(or)* + +This change added tests and can be verified as follows: + +*(example:)* +- *Extended interop tests to verify consistent valid schema names between SDKs* +- *Added test that validates that Java throws an AvroRuntimeException on invalid binary data* +- *Manually verified the change by building the website and checking the new redirect* + + +## Documentation + +- Does this pull request introduce a new feature? (yes / no) +- If yes, how is the feature documented? (not applicable / docs / JavaDocs / not documented) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 439a4dfcb33..b11db2b271d 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -51,6 +51,13 @@ updates: day: "sunday" open-pull-requests-limit: 20 + - package-ecosystem: "npm" + directory: "/doc" + schedule: + interval: "weekly" + day: "sunday" + open-pull-requests-limit: 10 + - package-ecosystem: "pip" directory: "/lang/py/" schedule: @@ -65,10 +72,15 @@ updates: day: "sunday" open-pull-requests-limit: 20 - - package-ecosystem: "cargo" - directory: "/lang/rust/" + - package-ecosystem: "bundler" + directory: "/doc/" schedule: interval: "weekly" day: "sunday" open-pull-requests-limit: 20 + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" + day: "sunday" diff --git a/.github/labeler.yml b/.github/labeler.yml index ae59a356e43..1e4e98e7831 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -18,15 +18,49 @@ # # Pull Request Labeler Github Action Configuration: https://github.com/marketplace/actions/labeler -C: ["lang/c/**/*"] -C++: ["lang/c++/**/*"] -C#: ["lang/csharp/**/*"] -Java: ["lang/java/**/*"] -Js: ["lang/js/**/*"] -Perl: ["lang/perl/**/*"] -Php: ["lang/php/**/*"] -Python: ["lang/py/**/*"] -Ruby: ["lang/ruby/**/*"] -Rust: ["lang/rust/**/*"] -build: ["**/*Dockerfile*", "**/*.sh", "**/*pom.xml", ".github/**/*"] -website: ["doc/**/*"] +C: + - changed-files: + - any-glob-to-any-file: "lang/c/**/*" + - any-glob-to-any-file: ".github/workflows/test-lang-c.yml" +C++: + - changed-files: + - any-glob-to-any-file: "lang/c++/**/*" + - any-glob-to-any-file: ".github/workflows/test-lang-c++.yml" +C#: + - changed-files: + - any-glob-to-any-file: "lang/csharp/**/*" + - any-glob-to-any-file: ".github/workflows/*-csharp*.yml" +Java: + - changed-files: + - any-glob-to-any-file: "lang/java/**/*" + - any-glob-to-any-file: ".github/workflows/*-java*.yml" + - any-glob-to-any-file: ".github/workflows/*maven*.yml" + - any-glob-to-any-file: ".github/workflows/*spotless*.yml" +Js: + - changed-files: + - any-glob-to-any-file: "lang/js/**/*" + - any-glob-to-any-file: ".github/workflows/*-js*.yml" +Perl: + - changed-files: + - any-glob-to-any-file: "lang/perl/**/*" + - any-glob-to-any-file: ".github/workflows/test-lang-perl.yml" +Php: + - changed-files: + - any-glob-to-any-file: "lang/php/**/*" + - any-glob-to-any-file: ".github/workflows/test-lang-php.yml" +Python: + - changed-files: + - any-glob-to-any-file: "lang/py/**/*" + - any-glob-to-any-file: ".github/workflows/*-py*.yml" +Ruby: + - changed-files: + - any-glob-to-any-file: "lang/ruby/**/*" + - any-glob-to-any-file: ".github/workflows/test-lang-ruby.yml" +build: + - changed-files: + - any-glob-to-any-file: ["**/*Dockerfile*", "**/*.sh", "**/*pom.xml", ".github/**/*"] + - any-glob-to-any-file: ".github/workflows/test-docker.yml" +website: + - changed-files: + - any-glob-to-any-file: "doc/**/*" + - any-glob-to-any-file: ".github/workflows/deploy-docs.yml" diff --git a/.github/workflows/codeql-csharp-analysis.yml b/.github/workflows/codeql-csharp-analysis.yml index 3cbb0fdd245..fae5659a9c6 100644 --- a/.github/workflows/codeql-csharp-analysis.yml +++ b/.github/workflows/codeql-csharp-analysis.yml @@ -23,15 +23,19 @@ name: "CodeQL C#" on: push: branches: - - master + - main pull_request: # The branches below must be a subset of the branches above branches: - - master + - main paths: - .github/workflows/codeql-csharp-analysis.yml - lang/csharp/** +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: analyze: name: Analyze @@ -49,15 +53,24 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v6 with: # We must fetch at least the immediate parents so that if this is # a pull request then we can checkout the head. fetch-depth: 2 + # Install .NET SDKs + - name: Install .NET SDKs + uses: actions/setup-dotnet@v5 + with: + dotnet-version: | + 6.0.x + 7.0.x + 8.0.x + # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v1 + uses: github/codeql-action/init@v4 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -69,7 +82,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@v1 + uses: github/codeql-action/autobuild@v4 # â„šī¸ Command-line programs to run using the OS shell. # 📚 https://git.io/JvXDl @@ -79,4 +92,4 @@ jobs: # uses a compiled language - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v1 + uses: github/codeql-action/analyze@v4 diff --git a/.github/workflows/codeql-java-analysis.yml b/.github/workflows/codeql-java-analysis.yml index 1b4933fcf60..b7668b497fd 100644 --- a/.github/workflows/codeql-java-analysis.yml +++ b/.github/workflows/codeql-java-analysis.yml @@ -23,15 +23,19 @@ on: workflow_dispatch: push: branches: - - master + - main pull_request: branches: - - master + - main paths: - .github/workflows/codeql-java-analysis.yml - lang/java/** - pom.xml +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: analyze: name: Analyze @@ -49,7 +53,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v6 with: # We must fetch at least the immediate parents so that if this is # a pull request then we can checkout the head. @@ -57,7 +61,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v1 + uses: github/codeql-action/init@v4 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -66,10 +70,25 @@ jobs: # queries: ./path/to/local/query, your-org/your-repo/queries@main queries: +security-and-quality + - name: 'Setup Temurin JDK 8, 11, 17 & 21' + uses: actions/setup-java@v5 + with: + distribution: 'temurin' + java-version: | + 8 + 11 + 17 + 21 + + - name: 'Setup Maven 3.9.11' + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 + with: + maven-version: 3.9.11 + # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - - name: Autobuild - uses: github/codeql-action/autobuild@v1 +# - name: Autobuild +# uses: github/codeql-action/autobuild@v3 # â„šī¸ Command-line programs to run using the OS shell. # 📚 https://git.io/JvXDl @@ -77,6 +96,8 @@ jobs: # âœī¸ If the Autobuild fails above, remove it and uncomment the following three lines # and modify them (or add more) to build your code if your project # uses a compiled language + - name: 'Java Test' + run: mvn clean test - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v1 + uses: github/codeql-action/analyze@v4 diff --git a/.github/workflows/codeql-js-analysis.yml b/.github/workflows/codeql-js-analysis.yml index 58d2a0a6296..453b926b165 100644 --- a/.github/workflows/codeql-js-analysis.yml +++ b/.github/workflows/codeql-js-analysis.yml @@ -23,15 +23,19 @@ name: "CodeQL JavaScript" on: push: branches: - - master + - main pull_request: # The branches below must be a subset of the branches above branches: - - master + - main paths: - .github/workflows/codeql-js-analysis.yml - lang/js/** +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: analyze: name: Analyze @@ -49,7 +53,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v6 with: # We must fetch at least the immediate parents so that if this is # a pull request then we can checkout the head. @@ -57,7 +61,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v1 + uses: github/codeql-action/init@v4 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -69,7 +73,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@v1 + uses: github/codeql-action/autobuild@v4 # â„šī¸ Command-line programs to run using the OS shell. # 📚 https://git.io/JvXDl @@ -79,4 +83,4 @@ jobs: # uses a compiled language - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v1 + uses: github/codeql-action/analyze@v4 diff --git a/.github/workflows/codeql-py-analysis.yml b/.github/workflows/codeql-py-analysis.yml index 048b2ed1a9b..ea7567656ab 100644 --- a/.github/workflows/codeql-py-analysis.yml +++ b/.github/workflows/codeql-py-analysis.yml @@ -23,15 +23,19 @@ name: "CodeQL Python" on: push: branches: - - master + - main pull_request: # The branches below must be a subset of the branches above branches: - - master + - main paths: - .github/workflows/codeql-py-analysis.yml - lang/py/** +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: analyze: name: Analyze @@ -49,7 +53,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v6 with: # We must fetch at least the immediate parents so that if this is # a pull request then we can checkout the head. @@ -57,7 +61,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v1 + uses: github/codeql-action/init@v4 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -69,7 +73,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@v1 + uses: github/codeql-action/autobuild@v4 # â„šī¸ Command-line programs to run using the OS shell. # 📚 https://git.io/JvXDl @@ -79,4 +83,4 @@ jobs: # uses a compiled language - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v1 + uses: github/codeql-action/analyze@v4 diff --git a/.github/workflows/deploy-docs.yml b/.github/workflows/deploy-docs.yml new file mode 100644 index 00000000000..f456026f505 --- /dev/null +++ b/.github/workflows/deploy-docs.yml @@ -0,0 +1,322 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# For most projects, this workflow file will not need changing; you simply need +# to commit it to your repository. + + +# A Github Actions workflow that builds and copies the website to asf-site branch +name: Deploy website + +on: + # Runs on pushes targeting the default branch + push: + branches: + - main + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. +# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. +concurrency: + group: "pages" + cancel-in-progress: false + +jobs: + build-website: + name: Build website + runs-on: ubuntu-latest + env: + HUGO_VERSION: 0.132.1 + steps: + - name: Install Hugo CLI + run: | + wget -q -O ${{ runner.temp }}/hugo.deb https://github.com/gohugoio/hugo/releases/download/v${HUGO_VERSION}/hugo_extended_${HUGO_VERSION}_linux-amd64.deb \ + && sudo dpkg -i ${{ runner.temp }}/hugo.deb + - name: Install Dart Sass + run: sudo snap install dart-sass + - name: Checkout + uses: actions/checkout@v6 + + - name: Install Node.js dependencies + working-directory: doc/ + run: npm ci + - name: Build with Hugo + working-directory: doc/ + env: + HUGO_CACHEDIR: ${{ runner.temp }}/hugo_cache + HUGO_ENVIRONMENT: production + TZ: America/Los_Angeles + run: | + hugo \ + --gc \ + --minify \ + --destination ${{ runner.temp }}/website \ + --baseURL "/" + - uses: actions/upload-artifact@v7 + with: + name: website + path: ${{ runner.temp }}/website + + build-api-c: + name: Build C API docs + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v6 + + - name: Build C docs + run: | + set -x + sudo apt-get update -q + sudo apt-get install -q -y cmake liblzma-dev libsnappy-dev libjansson-dev zlib1g-dev pkg-config asciidoc source-highlight libsource-highlight-dev + cd lang/c + ./build.sh clean docs + - uses: actions/upload-artifact@v7 + with: + name: api-c + path: build/c/docs + + build-api-cpp: + name: Build C++ API docs + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v6 + + - name: Build C++ docs + run: | + set -x + sudo apt-get update -q + sudo apt-get install -q -y gcc g++ libboost-all-dev cmake doxygen + cd lang/c++ + ./build.sh clean doc + - uses: actions/upload-artifact@v7 + with: + name: api-c++ + path: lang/c++/doc/html + + build-api-csharp: + name: Build C# API docs + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v6 + + - name: Build C# docs + run: | + set -x + sudo apt-get update -q + sudo apt-get install -q -y wget libzstd-dev libicu-dev doxygen + sudo wget https://dot.net/v1/dotnet-install.sh + bash ./dotnet-install.sh --channel "8.0" --install-dir "$HOME/.dotnet" + cd lang/csharp + mkdir -p build/doc + doxygen Avro.dox + - uses: actions/upload-artifact@v7 + with: + name: api-csharp + path: lang/csharp/build/doc/html + + build-api-java: + name: Build Java API docs + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v6 + + - name: Cache Local Maven Repository + uses: actions/cache@v5 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven- + + - name: 'Setup Maven' + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 + with: + maven-version: 3.9.9 + + - name: Setup Temurin JDK + uses: actions/setup-java@v5 + with: + distribution: 'temurin' + java-version: | + 11 + 17 + 21 + + - name: Build Java docs + run: | + set -x + cd lang/java + ./build.sh dist + - uses: actions/upload-artifact@v7 + with: + name: api-java + path: lang/java/target/reports/apidocs + + build-api-python: + name: Build Python API docs + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v6 + + - name: Setup Python + uses: actions/setup-python@v6 + with: + python-version: 3.11 + + - name: Setup uv + uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 + + - name: Build docs + working-directory: lang/py + run: ./build.sh doc + + - uses: actions/upload-artifact@v7 + with: + name: api-python + path: lang/py/docs/build/ + + build-api-rust: + name: Build Rust API docs + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v6 + with: + repository: 'apache/avro-rs' + + - name: Rust Toolchain + uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 # stable + with: + toolchain: stable + + - name: Build Rustdocs + run: | + set -x + cargo doc --all-features + - uses: actions/upload-artifact@v7 + with: + name: api-rust + path: target/doc + + + push-website: + name: Push website + needs: [build-website, build-api-c, build-api-cpp, build-api-csharp, build-api-java, build-api-rust, build-api-python] + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Install dependencies + run: | + set -x + sudo apt-get update -q + sudo apt-get install -q -y subversion + + - name: Download website + uses: actions/download-artifact@v8 + with: + name: website + path: ${{ runner.temp }}/website + + - name: Download api-c + uses: actions/download-artifact@v8 + with: + name: api-c + path: api-c + + - name: Download api-c++ + uses: actions/download-artifact@v8 + with: + name: api-c++ + path: api-c++ + + - name: Download api-csharp + uses: actions/download-artifact@v8 + with: + name: api-csharp + path: api-csharp + + - name: Download api-java + uses: actions/download-artifact@v8 + with: + name: api-java + path: api-java + + - name: Download api-python + uses: actions/download-artifact@v8 + with: + name: api-python + path: api-python + + - name: Download api-rust + uses: actions/download-artifact@v8 + with: + name: api-rust + path: api-rust + + - name: Copy the generated HTML + run: | + set -x + + WEBSITE_API=${{ runner.temp }}/website/docs/++version++/api + mkdir -p $WEBSITE_API/{c,cpp/html,csharp/html,java,py/html,rust} + + mv api-c/* $WEBSITE_API/c/ + mv api-c++/* $WEBSITE_API/cpp/html/ + mv api-csharp/* $WEBSITE_API/csharp/html/ + mv api-java/* $WEBSITE_API/java/ + mv api-python/* $WEBSITE_API/py/ + mv api-rust/* $WEBSITE_API/rust/ + rmdir api-c api-c++ api-csharp api-python api-rust api-java + + - name: Checkout old docs versions from Subversion + run: | + set -xe + svn checkout https://svn.apache.org/repos/asf/avro/site/publish/docs + rm -rf docs/.svn + cp -R docs/1* ${{ runner.temp }}/website/docs/ + rm -rf docs + + - name: Push the new website + run: | + set -ex + + ls -la ${{ runner.temp }}/website/docs/ + + git config --global user.email "dev@avro.apache.org" + git config --global user.name "Github Actions" + git checkout --orphan asf-site-staging + + git rm -rf * + + mv ${{ runner.temp }}/website/* . + echo "publish: + whoami: asf-site + " > .asf.yaml + touch .nojekyll + git add --all + git commit -m "Publish built website triggered by ${{ github.sha }}" + git switch asf-site + git reset --hard asf-site-staging + git push origin asf-site --force diff --git a/.github/workflows/java-publish-snapshot.yml b/.github/workflows/java-publish-snapshot.yml new file mode 100644 index 00000000000..c20d218e19e --- /dev/null +++ b/.github/workflows/java-publish-snapshot.yml @@ -0,0 +1,67 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# For most projects, this workflow file will not need changing; you simply need +# to commit it to your repository. + +name: "Publish Snapshot to Maven" +on: + workflow_dispatch: + push: + branches: [ main ] + paths: + - .github/workflows/java-publish-snapshot.yml + - lang/java/** + - pom.xml + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + publish-snapshot: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + + - name: Cache Local Maven Repository + uses: actions/cache@v5 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven- + + - name: 'Setup Temurin JDK 8, 11, 17 & 21' + uses: actions/setup-java@v5 + with: + distribution: 'temurin' + java-version: | + 8 + 11 + 17 + 21 + + - name: 'Setup Maven' + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 + with: + maven-version: 3.9.8 + + - name: 'Deploy Maven snapshots' + env: + ASF_USERNAME: ${{ secrets.NEXUS_USER }} + ASF_PASSWORD: ${{ secrets.NEXUS_PW }} + run: | + echo "apache.snapshots.https$ASF_USERNAME$ASF_PASSWORD" > ${{runner.temp}}/settings.xml + mvn --settings ${{runner.temp}}/settings.xml -U -B -e -fae -ntp -PskipQuality deploy diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml index c786eb6440e..42883591941 100644 --- a/.github/workflows/labeler.yml +++ b/.github/workflows/labeler.yml @@ -21,9 +21,11 @@ on: pull_request_target jobs: triage: + permissions: + contents: read + pull-requests: write runs-on: ubuntu-latest steps: - - uses: actions/labeler@v2 + - uses: actions/labeler@v6 with: - repo-token: "${{ secrets.GITHUB_TOKEN }}" sync-labels: true diff --git a/.github/workflows/maven4.yml b/.github/workflows/maven4.yml new file mode 100644 index 00000000000..f19c6dcd355 --- /dev/null +++ b/.github/workflows/maven4.yml @@ -0,0 +1,69 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: 'Maven 4' +on: + workflow_dispatch: + push: + branches: [ main ] + pull_request: + branches: [ main ] + paths: + - .github/workflows/maven4.yml + - lang/java/** + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + maven4: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + + - name: Cache Local Maven Repository + uses: actions/cache@v5 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven- + + - name: Cache Maven 4 Build Cache + uses: actions/cache@v5 + with: + path: ~/.m2/build-cache + key: ${{ runner.os }}-maven-build-cache-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven-build-cache + + - name: 'Setup Temurin JDK 8, 11, 17 & 21' + uses: actions/setup-java@v5 + with: + distribution: 'temurin' + java-version: | + 8 + 11 + 17 + 21 + + - name: Setup Maven 4 + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 + with: + maven-version: 4.0.0-alpha-10 + + - name: Test + run: mvn clean verify diff --git a/.github/workflows/rat.yml b/.github/workflows/rat.yml index d3fa1868a46..96a9c0ec915 100644 --- a/.github/workflows/rat.yml +++ b/.github/workflows/rat.yml @@ -17,29 +17,42 @@ name: 'Rat' on: workflow_dispatch: push: - branches: [ master ] + branches: [ main ] pull_request: - branches: [ master ] + branches: [ main ] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true jobs: rat: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 - name: Cache Local Maven Repository - uses: actions/cache@v2 + uses: actions/cache@v5 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-maven- - - name: Setup Java - uses: actions/setup-java@v2 + - name: 'Setup Temurin JDK 8, 11, 17 & 21' + uses: actions/setup-java@v5 + with: + distribution: 'temurin' + java-version: | + 8 + 11 + 17 + 21 + + - name: 'Setup Maven 3.9.11' + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 with: - distribution: 'adopt' - java-version: '11' + maven-version: 3.9.11 - name: Run Rat - run: mvn test -Dmaven.main.skip=true -Dmaven.test.skip=true -DskipTests=true -P rat -pl :avro-toplevel + run: mvn test -Dmaven.main.skip=true -Dmaven.test.skip=true -DskipTests=true -Dinvoker.skip=true -P rat -pl :avro-toplevel diff --git a/.github/workflows/spotless.yml b/.github/workflows/spotless.yml index 45c7e9de4d7..2b4f8bdedf8 100644 --- a/.github/workflows/spotless.yml +++ b/.github/workflows/spotless.yml @@ -17,32 +17,45 @@ name: 'Spotless' on: workflow_dispatch: push: - branches: [ master ] + branches: [ main ] pull_request: - branches: [ master ] + branches: [ main ] paths: - .github/workflows/spotless.yml - lang/java/** +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: spotless: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 - name: Cache Local Maven Repository - uses: actions/cache@v2 + uses: actions/cache@v5 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-maven- - - name: Setup Java - uses: actions/setup-java@v2 + - name: 'Setup Temurin JDK 8, 11, 17 & 21' + uses: actions/setup-java@v5 + with: + distribution: 'temurin' + java-version: | + 8 + 11 + 17 + 21 + + - name: 'Setup Maven 3.9.11' + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 with: - distribution: 'adopt' - java-version: '11' + maven-version: 3.9.11 - name: Run Spotless Check run: mvn spotless:check diff --git a/lang/rust/build.sh b/.github/workflows/test-docker.yml old mode 100755 new mode 100644 similarity index 61% rename from lang/rust/build.sh rename to .github/workflows/test-docker.yml index d9a24849c32..da7baedabb5 --- a/lang/rust/build.sh +++ b/.github/workflows/test-docker.yml @@ -1,5 +1,3 @@ -#!/bin/bash - # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. @@ -15,30 +13,26 @@ # See the License for the specific language governing permissions and # limitations under the License. -set -e +name: 'Docker tests' +on: + workflow_dispatch: + push: + branches: [ main ] + pull_request: + branches: [ main ] + paths: + - 'share/docker/*' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true -cd `dirname "$0"` +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 -for target in "$@" -do - case "$target" in - clean) - cargo clean - ;; - lint) - cargo clippy --all-targets --all-features -- -Dclippy::all - ;; - test) - cargo test - ;; - dist) - cargo build --release --lib --all-features - cargo package - mkdir -p ../../dist/rust - cp target/package/avro-rs-*.crate ../../dist/rust - ;; - *) - echo "Usage: $0 {lint|test|dist|clean}" >&2 - exit 1 - esac -done + - name: Run Docker tests + shell: bash + run: ./build.sh docker-test diff --git a/.github/workflows/test-lang-c++.yml b/.github/workflows/test-lang-c++.yml index c7db3804fec..ab2a60a9a62 100644 --- a/.github/workflows/test-lang-c++.yml +++ b/.github/workflows/test-lang-c++.yml @@ -17,9 +17,9 @@ name: Test C++ on: workflow_dispatch: push: - branches: [ master ] + branches: [main, branch-1.11, branch-1.12] pull_request: - branches: [ master ] + branches: [main, branch-1.11, branch-1.12] paths: - '.github/workflows/test-lang-c\+\+.yml' - 'lang/c\+\+/**' @@ -28,14 +28,29 @@ defaults: run: working-directory: lang/c++ +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: test: - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: + - ubuntu-latest + - ubuntu-24.04-arm steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 - name: Install Dependencies - run: sudo apt-get install -qqy cppcheck libboost-all-dev libsnappy-dev cmake + run: sudo apt update && sudo apt-get install -qqy cppcheck libboost-all-dev libsnappy-dev libfmt-dev zlib1g-dev libzstd-dev cmake + + - name: Print Versions + run: | + gcc --version + cmake --version + cppcheck --version - name: Clean run: ./build.sh clean @@ -45,3 +60,9 @@ jobs: - name: Test run: ./build.sh test + + - name: Release build + run: | + mkdir -p build + cd build + cmake -G "Unix Makefiles" -D CMAKE_BUILD_TYPE=Release .. diff --git a/.github/workflows/test-lang-c.yml b/.github/workflows/test-lang-c.yml index 764a29364d6..e3afece7c55 100644 --- a/.github/workflows/test-lang-c.yml +++ b/.github/workflows/test-lang-c.yml @@ -17,9 +17,9 @@ name: Test C on: workflow_dispatch: push: - branches: [ master ] + branches: [main, branch-1.11, branch-1.12] pull_request: - branches: [ master ] + branches: [main, branch-1.11, branch-1.12] paths: - .github/workflows/test-lang-c.yml - lang/c/** @@ -28,14 +28,23 @@ defaults: run: working-directory: lang/c +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: test: - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: + - ubuntu-latest + - ubuntu-24.04-arm steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 - name: Install Dependencies - run: sudo apt-get install -qqy libjansson-dev libsnappy-dev + run: sudo apt-get update && sudo apt-get install -qqy libjansson-dev libsnappy-dev - name: Lint run: ./build.sh lint @@ -43,17 +52,40 @@ jobs: - name: Test run: ./build.sh test + - name: Check pkg-config + run: | + mkdir -p build + cd build + cmake .. + export PKG_CONFIG_PATH=./src + pkg-config --libs avro-c + - name: Cache Local Maven Repository - uses: actions/cache@v2 + uses: actions/cache@v5 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-maven- + - name: 'Setup Temurin JDK 8, 11, 17 & 21' + uses: actions/setup-java@v5 + with: + distribution: 'temurin' + java-version: | + 8 + 11 + 17 + 21 + + - name: 'Setup Maven 3.9.11' + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 + with: + maven-version: 3.9.11 + - name: Install Java Avro for Interop Test working-directory: . - run: mvn -B install -DskipTests + run: mvn -B install -PskipQuality - name: Create Interop Data Directory working-directory: . @@ -70,12 +102,18 @@ jobs: run: ./build.sh interop-data-test interop: - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: + - ubuntu-latest + - ubuntu-24.04-arm steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 - name: Install Dependencies run: | + sudo apt-get update && \ sudo apt-get install -qqy --no-install-recommends libbz2-dev \ libjansson-dev \ liblzma-dev \ @@ -83,16 +121,31 @@ jobs: libzstd-dev - name: Cache Local Maven Repository - uses: actions/cache@v2 + uses: actions/cache@v5 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-maven- + - name: 'Setup Temurin JDK 8, 11, 17 & 21' + uses: actions/setup-java@v5 + with: + distribution: 'temurin' + java-version: | + 8 + 11 + 17 + 21 + + - name: 'Setup Maven 3.9.11' + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 + with: + maven-version: 3.9.11 + - name: Install Java Avro for Interop Test working-directory: . - run: mvn -B install -DskipTests + run: mvn -B install -PskipQuality - name: Create Interop Data Directory working-directory: . diff --git a/.github/workflows/test-lang-csharp.yml b/.github/workflows/test-lang-csharp.yml index b1959009e64..af5ddbc8e0c 100644 --- a/.github/workflows/test-lang-csharp.yml +++ b/.github/workflows/test-lang-csharp.yml @@ -17,9 +17,9 @@ name: 'Test C#' on: workflow_dispatch: push: - branches: [ master ] + branches: [main, branch-1.11, branch-1.12] pull_request: - branches: [ master ] + branches: [main, branch-1.11, branch-1.12] paths: - .github/workflows/test-lang-csharp.yml - lang/csharp/** @@ -28,13 +28,34 @@ defaults: run: working-directory: lang/csharp +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: test: - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: + - ubuntu-latest + - ubuntu-24.04-arm steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 + + - name: Add libzstd + shell: bash + run: sudo apt-get install -y libzstd-dev + + - name: Install .NET SDKs + uses: actions/setup-dotnet@v5 + with: + dotnet-version: | + 6.0.x + 7.0.x + 8.0.x - - uses: actions/cache@v2 + - uses: actions/cache@v5 with: path: ~/.nuget/packages key: ${{ runner.os }}-nuget-${{ hashFiles('**/packages.lock.json') }} @@ -48,21 +69,53 @@ jobs: run: ./build.sh test interop: - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: + - ubuntu-latest + - ubuntu-24.04-arm steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 + + - name: Add libzstd + shell: bash + run: sudo apt-get install -y libzstd-dev + + - name: Install .NET SDKs + uses: actions/setup-dotnet@v5 + with: + dotnet-version: | + 6.0.x + 7.0.x + 8.0.x - name: Cache Local Maven Repository - uses: actions/cache@v2 + uses: actions/cache@v5 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-maven- + - name: 'Setup Temurin JDK 8, 11, 17 & 21' + uses: actions/setup-java@v5 + with: + distribution: 'temurin' + java-version: | + 8 + 11 + 17 + 21 + + - name: 'Setup Maven 3.9.11' + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 + with: + maven-version: 3.9.11 + - name: Install Java Avro for Interop Test working-directory: . - run: mvn -B install -DskipTests + run: mvn -B install -PskipQuality - name: Create Interop Data Directory working-directory: . diff --git a/.github/workflows/test-lang-java.yml b/.github/workflows/test-lang-java.yml index b76b9c3a5b8..97d5111692f 100644 --- a/.github/workflows/test-lang-java.yml +++ b/.github/workflows/test-lang-java.yml @@ -13,110 +13,161 @@ # See the License for the specific language governing permissions and # limitations under the License. -name: 'Test Java' +name: "Test Java" on: workflow_dispatch: push: - branches: [ master ] + branches: [main, branch-1.11, branch-1.12] pull_request: - branches: [ master ] + branches: [main, branch-1.11, branch-1.12] paths: - - .github/workflows/test-lang-java.yml - - lang/java/** - - pom.xml + - .github/workflows/test-lang-java.yml + - lang/java/** + - pom.xml defaults: run: working-directory: lang/java +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: test: - name: Java ${{ matrix.java }} Test - runs-on: ubuntu-latest + name: "Java Test" + runs-on: ${{ matrix.os }} strategy: matrix: - java: - - '8' - - '11' + os: + - ubuntu-latest + - ubuntu-24.04-arm steps: - - uses: actions/checkout@v2 + - name: "Checkout sourcecode" + uses: actions/checkout@v6 - - name: Cache Local Maven Repository - uses: actions/cache@v2 + - name: "Cache Local Maven Repository" + uses: actions/cache@v5 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-maven- - - name: Setup Java - uses: actions/setup-java@v2 + - name: "Setup Temurin JDK 11, 17 & 21" + uses: actions/setup-java@v5 + with: + distribution: "temurin" + java-version: | + 11 + 17 + 21 + + - name: "Setup Maven 3.9.11" + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 with: - distribution: 'adopt' - java-version: ${{ matrix.java }} + maven-version: 3.9.11 + + - name: "Install Java Avro Toplevel" + working-directory: ./ + run: mvn -B install -PskipQuality -DskipTests - - name: Lint + - name: "Java Lint" run: ./build.sh lint - - name: Test + - name: "Java Test" run: ./build.sh test + - name: "Install Java Avro" + working-directory: . + run: mvn -B clean install -PskipQuality -DskipTests + + - name: "Test Reproducible Build" + working-directory: . + run: mvn clean verify -PskipQuality artifact:compare + interop: - name: Java ${{ matrix.java }} Interop - runs-on: ubuntu-latest + name: "Java Interop" + runs-on: ${{ matrix.os }} strategy: matrix: - java: - - '8' - - '11' + os: + - ubuntu-latest + - ubuntu-24.04-arm + steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 - - name: Cache Local Maven Repository - uses: actions/cache@v2 + - name: "Cache Local Maven Repository" + uses: actions/cache@v5 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-maven- - - name: Setup Java - uses: actions/setup-java@v2 + - name: "Setup Temurin JDK 11, 17 & 21" + uses: actions/setup-java@v5 + with: + distribution: "temurin" + java-version: | + 11 + 17 + 21 + + - name: "Setup Maven 3.9.11" + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 + with: + maven-version: 3.9.11 + + - name: "Setup Python for Generating Input Data" + uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 with: - distribution: 'adopt' - java-version: ${{ matrix.java }} + python-version: "3.12" - - name: Setup Python for Generating Input Data - uses: actions/setup-python@v2 + - name: Setup uv + uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 - - name: Apt Install Compression Libs Required by Python + - name: "Apt Install Compression Libs Required by Python" run: | + sudo apt-get update && \ sudo apt-get install -qqy --no-install-recommends libbz2-dev \ liblzma-dev \ libsnappy-dev \ libzstd-dev - - name: Install Python Dependencies - run: | - python3 -m pip install --upgrade pip setuptools tox-wheel - python3 -m pip install python-snappy zstandard + - name: "Install Python Dependencies" + working-directory: lang/py + run: uv sync --frozen - - name: Install Java Avro for Interop Test - working-directory: . - run: mvn -B install -DskipTests + - name: "Setup C# for Generating Interop Data" + uses: actions/setup-dotnet@v5 + with: + dotnet-version: | + 6.0.x + 7.0.x + 8.0.x - - name: Create Interop Data Directory + - name: "Create Interop Data Directory" working-directory: . run: mkdir -p build/interop/data - - name: Generate Interop Resources - working-directory: lang/java/avro - run: mvn -B -P interop-data-generate generate-resources - - - name: Generate Interop Data using Python + - name: "Generate Interop Data using Python" working-directory: lang/py run: ./build.sh interop-data-generate - - name: Run Interop Tests - working-directory: lang/java/ipc - run: mvn -B test -P interop-data-test + - name: "Generate Interop Data using C#" + working-directory: lang/csharp + run: ./build.sh interop-data-generate + + - name: "Install Java Avro for other tests" + working-directory: . + run: mvn -B install -PskipQuality + + - name: "Generate Interop Data using Java 11, 17 & 21" + working-directory: lang/java/interop-data-test + run: mvn -B verify -Pgenerate-test-data + + - name: "Run Interop Tests using Java 11, 17 & 21" + working-directory: lang/java/interop-data-test + run: mvn -B verify -Pcheck-test-data diff --git a/.github/workflows/test-lang-js.yml b/.github/workflows/test-lang-js.yml index 1f5bebce252..168a2a1174c 100644 --- a/.github/workflows/test-lang-js.yml +++ b/.github/workflows/test-lang-js.yml @@ -17,9 +17,9 @@ name: 'Test JavaScript' on: workflow_dispatch: push: - branches: [ master ] + branches: [main, branch-1.11, branch-1.12] pull_request: - branches: [ master ] + branches: [main, branch-1.11, branch-1.12] paths: - .github/workflows/test-lang-js.yml - lang/js/** @@ -28,23 +28,30 @@ defaults: run: working-directory: lang/js +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: test: - name: Node ${{ matrix.node }} - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} strategy: matrix: + os: + - ubuntu-latest + - ubuntu-24.04-arm node: - - 12 - - 14 + - 20 + - 22 + - 24 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 - name: Setup Node - uses: actions/setup-node@v2 + uses: actions/setup-node@v6 with: node-version: ${{ matrix.node }} - - uses: actions/cache@v2 + - uses: actions/cache@v5 with: path: ~/.npm key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }} @@ -58,46 +65,55 @@ jobs: run: ./build.sh test interop: - name: Node ${{ matrix.node }} Interop - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} strategy: matrix: + os: + - ubuntu-latest + - ubuntu-24.04-arm node: - - 12 - - 14 + - 20 + - 22 + - 24 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 - name: Setup Node - uses: actions/setup-node@v2 + uses: actions/setup-node@v6 with: node-version: ${{ matrix.node }} - - uses: actions/cache@v2 + - uses: actions/cache@v5 with: path: ~/.npm key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }} restore-keys: | ${{ runner.os }}-node- - - name: Cache Local Maven Repository - uses: actions/cache@v2 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: | - ${{ runner.os }}-maven- + - name: Setup Python for Generating Input Data + uses: actions/setup-python@v6 - - name: Install Java Avro for Interop Test - working-directory: . - run: mvn -B install -DskipTests + - name: Apt Install Compression Libs Required by Python + run: | + sudo apt-get update && \ + sudo apt-get install -qqy --no-install-recommends libbz2-dev \ + liblzma-dev \ + libsnappy-dev \ + libzstd-dev + + - name: Setup uv + uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # 8.1.0 + + - name: Install Dependencies + working-directory: lang/py + run: uv sync --frozen - name: Create Interop Data Directory working-directory: . run: mkdir -p build/interop/data - - name: Generate Interop Resources - working-directory: lang/java/avro - run: mvn -B -P interop-data-generate generate-resources + - name: Generate Interop Data using Python + working-directory: lang/py + run: ./build.sh interop-data-generate - name: Generate Interop Data run: ./build.sh interop-data-generate diff --git a/.github/workflows/test-lang-perl.yml b/.github/workflows/test-lang-perl.yml index bed6c367023..b92a13f1855 100644 --- a/.github/workflows/test-lang-perl.yml +++ b/.github/workflows/test-lang-perl.yml @@ -17,9 +17,9 @@ name: 'Test Perl' on: workflow_dispatch: push: - branches: [ master ] + branches: [main, branch-1.11, branch-1.12] pull_request: - branches: [ master ] + branches: [main, branch-1.11, branch-1.12] paths: - .github/workflows/test-lang-perl.yml - lang/perl/** @@ -28,16 +28,22 @@ defaults: run: working-directory: lang/perl +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: test: - name: Perl ${{ matrix.perl }} Tests - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} strategy: matrix: + os: + - ubuntu-latest + - ubuntu-24.04-arm perl: - - '5.32' + - '5.32' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 - uses: shogo82148/actions-setup-perl@v1 with: @@ -45,25 +51,19 @@ jobs: - name: Install Dependencies run: | - sudo apt-get -qqy install --no-install-recommends libjansson-dev \ - libcompress-raw-zlib-perl \ - libcpan-uploader-perl \ - libencode-perl \ - libio-string-perl \ - libjson-xs-perl \ - libmodule-install-perl \ - libmodule-install-readmefrompod-perl \ - libobject-tiny-perl \ - libperl-critic-perl \ - libsnappy-dev \ - libtest-exception-perl \ - libtest-pod-perl cpanm --mirror https://www.cpan.org/ install Compress::Zstd \ + Encode \ Error::Simple \ - Module::Install::Repository \ + JSON::MaybeXS \ + Module::Install \ + Module::Install::ReadmeFromPod \ + Object::Tiny \ + Perl::Critic \ Regexp::Common \ - Try::Tiny \ - inc::Module::Install + Test::Exception \ + Test::More \ + Test::Pod \ + Try::Tiny - name: Lint run: ./build.sh lint @@ -72,14 +72,16 @@ jobs: run: ./build.sh test interop: - name: Perl ${{ matrix.perl }} Interop - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} strategy: matrix: + os: + - ubuntu-latest + - ubuntu-24.04-arm perl: - - '5.32' + - '5.32' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 - uses: shogo82148/actions-setup-perl@v1 with: @@ -87,37 +89,48 @@ jobs: - name: Install Dependencies run: | - sudo apt-get -qqy install --no-install-recommends libcompress-raw-zlib-perl \ - libcpan-uploader-perl \ - libencode-perl \ - libio-string-perl \ - libjansson-dev \ - libjson-xs-perl \ - libmodule-install-perl \ - libmodule-install-readmefrompod-perl \ - libobject-tiny-perl \ - libsnappy-dev \ - libtest-exception-perl \ - libtest-pod-perl + sudo apt-get update && \ + sudo apt-get -qqy install --no-install-recommends libjansson-dev \ + libsnappy-dev cpanm --mirror https://www.cpan.org/ install Compress::Zstd \ + Encode \ Error::Simple \ - Module::Install::Repository \ + JSON::MaybeXS \ + Module::Install \ + Module::Install::ReadmeFromPod \ Object::Tiny \ Regexp::Common \ - Try::Tiny \ - inc::Module::Install + Test::Exception \ + Test::More \ + Test::Pod \ + Try::Tiny - name: Cache Local Maven Repository - uses: actions/cache@v2 + uses: actions/cache@v5 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-maven- + - name: 'Setup Temurin JDK 8, 11, 17 & 21' + uses: actions/setup-java@v5 + with: + distribution: 'temurin' + java-version: | + 8 + 11 + 17 + 21 + + - name: 'Setup Maven 3.9.11' + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 + with: + maven-version: 3.9.11 + - name: Install Java Avro for Interop Test working-directory: . - run: mvn -B install -DskipTests + run: mvn -B install -PskipQuality - name: Create Interop Data Directory working-directory: . diff --git a/.github/workflows/test-lang-php.yml b/.github/workflows/test-lang-php.yml index 1fc227f7f6c..e8e56d25a46 100644 --- a/.github/workflows/test-lang-php.yml +++ b/.github/workflows/test-lang-php.yml @@ -17,41 +17,51 @@ name: 'Test PHP' on: workflow_dispatch: push: - branches: [ master ] + branches: [main, branch-1.11, branch-1.12] pull_request: - branches: [ master ] + branches: [main, branch-1.11, branch-1.12] paths: - .github/workflows/test-lang-php.yml + - composer.json - lang/php/** defaults: run: working-directory: lang/php +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: test: - name: PHP ${{ matrix.php }} Test - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} strategy: matrix: + os: + - ubuntu-latest + - ubuntu-24.04-arm php: - - '7.3' - - '7.4' - - '8.0' + - '8.1' + - '8.2' + - '8.3' + - '8.4' + - '8.5' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 - name: Setup PHP uses: shivammathur/setup-php@v2 with: php-version: ${{ matrix.php }} + tools: composer:2.9.3 - name: Get Composer Cache Directory id: composer-cache - run: echo "::set-output name=dir::$(composer config cache-files-dir)" + run: echo "dir=$(composer config cache-files-dir)" >> $GITHUB_OUTPUT - - uses: actions/cache@v2 + - uses: actions/cache@v5 with: path: ${{ steps.composer-cache.outputs.dir }} key: ${{ runner.os }}-composer-${{ hashFiles('**/composer.lock') }} @@ -59,40 +69,64 @@ jobs: ${{ runner.os }}-composer- - name: Lint + if: matrix.php == '8.1' run: ./build.sh lint + - name: Static analysis + run: ./build.sh phpstan + - name: Test run: ./build.sh test interop: - name: PHP ${{ matrix.php }} Interop - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} strategy: matrix: + os: + - ubuntu-latest + - ubuntu-24.04-arm php: - - '7.3' - - '7.4' - - '8.0' + - '8.1' + - '8.2' + - '8.3' + - '8.4' + - '8.5' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 - name: Setup PHP uses: shivammathur/setup-php@v2 with: php-version: ${{ matrix.php }} + tools: composer:2.9.3 - name: Cache Local Maven Repository - uses: actions/cache@v2 + uses: actions/cache@v5 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-maven- + - name: 'Setup Temurin JDK 8, 11, 17 & 21' + uses: actions/setup-java@v5 + with: + distribution: 'temurin' + java-version: | + 8 + 11 + 17 + 21 + + - name: 'Setup Maven 3.9.11' + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 + with: + maven-version: 3.9.11 + - name: Install Java Avro for Interop Test working-directory: . - run: mvn -B install -DskipTests + run: mvn -B install -PskipQuality - name: Create Interop Data Directory working-directory: . @@ -102,7 +136,7 @@ jobs: working-directory: lang/java/avro run: mvn -B -P interop-data-generate generate-resources - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 with: repository: kjdev/php-ext-zstd path: lang/php/php-ext-zstd @@ -118,7 +152,7 @@ jobs: echo "extension=zstd.so" | sudo tee -a /etc/php/${{ matrix.php }}/cli/conf.d/10-zstd.ini php -m - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 with: repository: kjdev/php-ext-snappy path: lang/php/php-ext-snappy diff --git a/.github/workflows/test-lang-py.yml b/.github/workflows/test-lang-py.yml index 19522c01b7e..1db39c4e069 100644 --- a/.github/workflows/test-lang-py.yml +++ b/.github/workflows/test-lang-py.yml @@ -13,46 +13,57 @@ # See the License for the specific language governing permissions and # limitations under the License. -name: 'Test Python' +name: "Test Python" on: workflow_dispatch: push: - branches: [ master ] + branches: [main, branch-1.11, branch-1.12] pull_request: - branches: [ master ] + branches: [main, branch-1.11, branch-1.12] paths: - - .github/workflows/test-lang-py.yml - - lang/py/** + - .github/workflows/test-lang-py.yml + - lang/py/** defaults: run: working-directory: lang/py +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: test: - name: Python ${{ matrix.python }} Tests - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: + os: + - ubuntu-latest + - ubuntu-24.04-arm python: - - '3.9' - - '3.8' - - '3.7' - - '3.6' - - 'pypy-3.7' - - 'pypy-3.6' + - "3.14" + - "3.13" + - "3.12" + - "3.11" + - "3.10" + - "pypy-3.11" + - "pypy-3.10" steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 - name: Setup Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python }} + - name: Setup uv + uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 + - name: Apt Install Compression Libs run: | + sudo apt-get update && \ sudo apt-get install -qqy --no-install-recommends libbz2-dev \ libjansson-dev \ liblzma-dev \ @@ -60,44 +71,45 @@ jobs: libzstd-dev - name: Install Dependencies - run: | - python3 -m pip install --upgrade pip setuptools tox-wheel + run: uv sync --frozen - name: Lint - if: ${{ matrix.python == '3.9' }} - run: python3 -m tox -e lint + if: ${{ matrix.python == '3.10' }} + run: ./build.sh lint - name: Typechecks - if: ${{ matrix.python == '3.9' }} - run: python3 -m tox -e typechecks + if: ${{ matrix.python == '3.10' }} + run: ./build.sh typechecks - name: Test - run: python3 -m tox -e py + run: ./build.sh test interop: - name: Python ${{ matrix.python }} Interop - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: + os: + - ubuntu-latest + - ubuntu-24.04-arm python: - - '3.9' - - '3.8' - - '3.7' - - '3.6' - - 'pypy-3.7' - - 'pypy-3.6' + - "3.13" + - "3.12" steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 - name: Setup Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python }} + - name: Setup uv + uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 + - name: Apt Install Compression Libs run: | + sudo apt-get update && \ sudo apt-get install -qqy --no-install-recommends libbz2-dev \ libjansson-dev \ liblzma-dev \ @@ -105,21 +117,34 @@ jobs: libzstd-dev - name: Install Dependencies - run: | - python3 -m pip install --upgrade pip setuptools tox-wheel - python3 -m pip install python-snappy zstandard + run: uv sync --frozen - name: Cache Local Maven Repository - uses: actions/cache@v2 + uses: actions/cache@v5 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-maven- + - name: "Setup Temurin JDK 8, 11, 17 & 21" + uses: actions/setup-java@v5 + with: + distribution: "temurin" + java-version: | + 8 + 11 + 17 + 21 + + - name: "Setup Maven 3.9.11" + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 + with: + maven-version: 3.9.11 + - name: Install Java Avro for Interop Test working-directory: . - run: mvn -B install -DskipTests + run: mvn -B install -PskipQuality - name: Create Interop Data Directory working-directory: . diff --git a/.github/workflows/test-lang-ruby.yml b/.github/workflows/test-lang-ruby.yml index 8f4f5076b89..bf98f9ccb78 100644 --- a/.github/workflows/test-lang-ruby.yml +++ b/.github/workflows/test-lang-ruby.yml @@ -17,9 +17,9 @@ name: 'Test Ruby' on: workflow_dispatch: push: - branches: [ master ] + branches: [main, branch-1.11, branch-1.12] pull_request: - branches: [ master ] + branches: [main, branch-1.11, branch-1.12] paths: - .github/workflows/test-lang-ruby.yml - lang/ruby/** @@ -28,27 +28,35 @@ defaults: run: working-directory: lang/ruby +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: test: - name: Ruby ${{ matrix.ruby }} Tests - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} strategy: matrix: + os: + - ubuntu-latest + - ubuntu-24.04-arm ruby: - - '2.6' - - '2.7' - - '3.0' + - '2.7' + - '3.0' + - '3.1' + - '3.2' + - '3.3' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 - uses: ruby/setup-ruby@v1 with: ruby-version: ${{ matrix.ruby }} - name: Install Dependencies - run: sudo apt-get install -qqy bundler libsnappy-dev + run: sudo apt-get update && sudo apt-get install -qqy libsnappy-dev - - uses: actions/cache@v2 + - uses: actions/cache@v5 with: path: .gem key: ${{ runner.os }}-gems-${{ hashFiles('**/Gemfile.lock') }} @@ -69,25 +77,29 @@ jobs: ./build.sh test interop: - name: Ruby ${{ matrix.ruby }} Interop - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} strategy: matrix: + os: + - ubuntu-latest + - ubuntu-24.04-arm ruby: - - '2.6' - - '2.7' - - '3.0' + - '2.7' + - '3.0' + - '3.1' + - '3.2' + - '3.3' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 - uses: ruby/setup-ruby@v1 with: ruby-version: ${{ matrix.ruby }} - name: Install Dependencies - run: sudo apt-get install -qqy bundler libsnappy-dev + run: sudo apt-get update && sudo apt-get install -qqy libsnappy-dev - - uses: actions/cache@v2 + - uses: actions/cache@v5 with: path: .gem key: ${{ runner.os }}-gems-${{ hashFiles('**/Gemfile.lock') }} @@ -98,16 +110,31 @@ jobs: run: bundle config path .gem - name: Cache Local Maven Repository - uses: actions/cache@v2 + uses: actions/cache@v5 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-maven- + - name: 'Setup Temurin JDK 8, 11, 17 & 21' + uses: actions/setup-java@v5 + with: + distribution: 'temurin' + java-version: | + 8 + 11 + 17 + 21 + + - name: 'Setup Maven 3.9.11' + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 + with: + maven-version: 3.9.11 + - name: Install Java Avro for Interop Test working-directory: . - run: mvn -B install -DskipTests + run: mvn -B install -PskipQuality - name: Create Interop Data Directory working-directory: . diff --git a/.github/workflows/test-lang-rust-audit.yml b/.github/workflows/test-lang-rust-audit.yml deleted file mode 100644 index efb3f1eaff7..00000000000 --- a/.github/workflows/test-lang-rust-audit.yml +++ /dev/null @@ -1,49 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -name: Rust Security Audit -on: - workflow_dispatch: - push: - branches: [ master ] - pull_request: - branches: [ master ] - paths: - - .github/workflows/test-lang-rust-audit.yml - - lang/rust/Cargo.toml - - lang/rust/Cargo.lock - -defaults: - run: - working-directory: lang/rust - -jobs: - audit: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v2 - # Currently does not work. See https://github.com/actions-rs/audit-check/issues/194 - #- name: Rust Audit - # uses: actions-rs/audit-check@v1 - # with: - # token: ${{ secrets.GITHUB_TOKEN }} - # Install it manually - - name: Install Cargo Audit - run: cargo install cargo-audit - - name: Audit - run: cargo audit diff --git a/.github/workflows/test-lang-rust-ci.yml b/.github/workflows/test-lang-rust-ci.yml deleted file mode 100644 index 977ea110731..00000000000 --- a/.github/workflows/test-lang-rust-ci.yml +++ /dev/null @@ -1,79 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -name: Rust Continuous Integration -on: - workflow_dispatch: - push: - branches: [ master ] - pull_request: - branches: [ master ] - paths: - - .github/workflows/test-lang-rust-ci.yml - - lang/rust/** - -defaults: - run: - working-directory: lang/rust - -jobs: - ci: - runs-on: ubuntu-latest - strategy: - matrix: - rust: - - stable - - beta - - nightly - - 1.48.0 # MSRV - - steps: - - name: Checkout - uses: actions/checkout@v2 - - - name: Rust Toolchain - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: ${{ matrix.rust }} - override: true - components: rustfmt - - - name: Rust Format - uses: actions-rs/cargo@v1 - with: - command: fmt - args: --manifest-path lang/rust/Cargo.toml --all -- --check - - - name: Rust Build - uses: actions-rs/cargo@v1 - with: - command: build - args: --manifest-path lang/rust/Cargo.toml --all-features --all-targets - - - name: Rust Test - uses: actions-rs/cargo@v1 - with: - command: test - args: --manifest-path lang/rust/Cargo.toml --all-features --all-targets - - # because of https://github.com/rust-lang/cargo/issues/6669 - - name: Rust Test docs - uses: actions-rs/cargo@v1 - with: - command: test - args: --manifest-path lang/rust/Cargo.toml --doc diff --git a/.github/workflows/test-lang-rust-clippy.yml b/.github/workflows/test-lang-rust-clippy.yml deleted file mode 100644 index cedc5f5f042..00000000000 --- a/.github/workflows/test-lang-rust-clippy.yml +++ /dev/null @@ -1,46 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -name: Rust Clippy Check -on: - workflow_dispatch: - push: - branches: [ master ] - pull_request: - branches: [ master ] - paths: - - .github/workflows/test-lang-rust-clippy.yml - - lang/rust/** - -defaults: - run: - working-directory: lang/rust - -jobs: - clippy_check: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - components: clippy - override: true - - uses: actions-rs/clippy-check@v1 - with: - token: ${{ secrets.GITHUB_TOKEN }} - args: --manifest-path lang/rust/Cargo.toml --all-features --all-targets -- -Dclippy::all -Dunused_imports diff --git a/.gitignore b/.gitignore index fd46be4f46f..437a035f0b3 100644 --- a/.gitignore +++ b/.gitignore @@ -28,3 +28,5 @@ test-output vendor composer.lock .phpunit.result.cache +.mvn/jvm.config # Maven JVM settings +**/*.run.xml # Intellij IDEA Run configurations diff --git a/.mvn/extensions.xml b/.mvn/extensions.xml new file mode 100644 index 00000000000..e2e84018d96 --- /dev/null +++ b/.mvn/extensions.xml @@ -0,0 +1,25 @@ + + + + + org.apache.maven.extensions + maven-build-cache-extension + 1.0.1 + + diff --git a/.travis/before_install.sh b/.travis/before_install.sh deleted file mode 100755 index db76c129165..00000000000 --- a/.travis/before_install.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -e - -case "$TRAVIS_OS_NAME" in -"linux") - sudo apt-get -q update - sudo apt-get -q install --no-install-recommends -y curl git gnupg-agent locales pinentry-curses pkg-config rsync software-properties-common - sudo apt-get -q clean - sudo rm -rf /var/lib/apt/lists/* - - # Only Yetus 0.9.0+ supports `ADD` and `COPY` commands in Dockerfile - curl -L https://www-us.apache.org/dist/yetus/0.10.0/apache-yetus-0.10.0-bin.tar.gz | tar xvz -C /tmp/ - # A dirty workaround to disable the Yetus robot for TravisCI, - # since it'll cancel the changes that .travis/script.sh will do, - # even if the `--dirty-workspace` option is specified. - rm /tmp/apache-yetus-0.10.0/lib/precommit/robots.d/travisci.sh - ;; -"windows") - # Install all (latest) SDKs which are used by multi framework projects - choco install dotnetcore-2.1-sdk # .NET Core 2.1 - choco install dotnetcore-sdk # .NET Core 3.1 - choco install dotnet-sdk # .NET 5.0 - ;; -*) - echo "Invalid PLATFORM" - exit 1 - ;; -esac diff --git a/BUILD.md b/BUILD.md index c09994e67fb..217630b625d 100644 --- a/BUILD.md +++ b/BUILD.md @@ -4,21 +4,20 @@ The following packages must be installed before Avro can be built: - - Java: JDK 1.8, Maven 3 or better, protobuf-compile - - PHP: php7, phpunit, php7-gmp - - Python 3: 3.5 or greater + - Java: 11, 17 and 21 with the appropriate toolchain config, Maven 3.9.6 or better, protobuf-compile + - PHP: php8, phpunit, php8-gmp + - Python 3: 3.10 or greater, tox (tox will install other dependencies as needed) - C: gcc, cmake, asciidoc, source-highlight, Jansson, pkg-config - C++: cmake 3.7.2 or greater, g++, flex, bison, libboost-dev - C#: .NET Core 2.2 SDK - - JavaScript: Node 12.x+, nodejs, npm - - Ruby: Ruby 2.6 or greater, ruby-dev, gem, bundler, snappy + - JavaScript: Node 20.x+, nodejs, npm + - Ruby: Ruby 2.7 or greater, ruby-dev, gem, bundler, snappy - Perl: Perl 5.24.1 or greater, gmake, Module::Install, Module::Install::ReadmeFromPod, Module::Install::Repository, - Math::BigInt, JSON::XS, Try::Tiny, Regexp::Common, Encode, - IO::String, Object::Tiny, Compress::ZLib, Error::Simple, - Test::More, Test::Exception, Test::Pod + Math::BigInt, JSON::MaybeXS, Try::Tiny, Regexp::Common, Encode, + Object::Tiny, Compress::ZLib, Error::Simple, Test::More, + Test::Exception, Test::Pod - Apache Ant 1.7 - - Apache Forrest 0.9 (for documentation) - md5sum, sha1sum, used by top-level dist target ## Using docker @@ -44,7 +43,7 @@ The working directory in the container is mounted from your host. This allows you to access the files in your Avro development tree from the Docker container. -There are some additional `DOCKER_` environment variables described in +There are some additional `DOCKER_` environment variables described in [build.sh](./build.sh) that can be used to interact with the image using the build script. Some examples: @@ -59,6 +58,20 @@ DOCKER_IMAGE_NAME=avro-build:1.10.1-rc1 ./build.sh docker DOCKER_RUN_ENTRYPOINT="mvn --version" ./build.sh docker ``` +## Developing inside a Container (Visual Studio Code Devcontainer) + +Requirement: + - [Visual Studio Code](https://code.visualstudio.com/) + - [Remote Development extension pack](https://aka.ms/vscode-remote/download/extension) + - Docker + - Windows: [Docker Desktop](https://www.docker.com/products/docker-desktop) + - macOS: [Docker Desktop](https://www.docker.com/products/docker-desktop) + - Linux: [Docker CE/EE](https://docs.docker.com/install/#supported-platforms) and [Docker Compose](https://docs.docker.com/compose/install) + +Useful links: + - [Developing inside a Container](https://code.visualstudio.com/docs/remote/containers) + - [Going further with Dev Containers](https://microsoft.github.io/code-with-engineering-playbook/developer-experience/going-further/) + ## Building Once the requirements are installed (or from the Docker container), diff --git a/DIST_README.txt b/DIST_README.txt index 003751c75b5..279909f483e 100644 --- a/DIST_README.txt +++ b/DIST_README.txt @@ -9,6 +9,6 @@ This distribution contains the following files: - avro-doc-x.y.z.tar.gz contains Avro's pre-built documentation. - - the c/, cpp/, csharp/, java/, js/, perl/, php/, py/, and ruby/ + - the c/, cpp/, csharp/, java/, js/, perl/, php/, py/ and ruby/ subdirectories contain pre-built, language-specific binaries, bundles, etc. as conveniences. diff --git a/LICENSE.txt b/LICENSE.txt index 7e159a69bc2..99c377d4aa6 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -201,37 +201,6 @@ See the License for the specific language governing permissions and limitations under the License. ----------------------------------------------------------------------- -License for the Jansson C JSON parser used in the C implementation: - -Copyright (c) 2009-2011 Petri Lehtinen - -Some files include an additional copyright notice: -* lang/c/jansson/src/pack_unpack.c - Copyright (c) 2011 Graeme Smecher -* lang/c/jansson/test/suites/api/test_unpack.c - Copyright (c) 2011 Graeme Smecher -* lang/c/jansson/src/memory.c - Copyright (c) 2011 Basile Starynkevitch - -| Permission is hereby granted, free of charge, to any person obtaining a copy -| of this software and associated documentation files (the "Software"), to deal -| in the Software without restriction, including without limitation the rights -| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -| copies of the Software, and to permit persons to whom the Software is -| furnished to do so, subject to the following conditions: -| -| The above copyright notice and this permission notice shall be included in -| all copies or substantial portions of the Software. -| -| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -| THE SOFTWARE. - ---------------------------------------------------------------------- License for msinttypes.h and msstdint.h used in the C implementation: Source from: @@ -295,155 +264,6 @@ Copyright (C) 2006 Toni Ronkko | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | OTHER DEALINGS IN THE SOFTWARE. ----------------------------------------------------------------------- -License for ivy-2.2.0.jar used in the python implementation: - -Apache License version 2.0 (see above) - ----------------------------------------------------------------------- -License for pyAntTasks-1.3.jar used in the python implementation: - -Apache License version 2.0 (see above) - ----------------------------------------------------------------------- -License for NUnit binary included with the C# implementation: -File: nunit.framework.dll - -| NUnit License -| -| Copyright Š 2002-2015 Charlie Poole -| Copyright Š 2002-2004 James W. Newkirk, Michael C. Two, Alexei A. Vorontsov -| Copyright Š 2000-2002 Philip A. Craig -| -| This software is provided 'as-is', without any express or implied warranty. In -| no event will the authors be held liable for any damages arising from the use -| of this software. -| -| Permission is granted to anyone to use this software for any purpose, including -| commercial applications, and to alter it and redistribute it freely, subject to -| the following restrictions: -| -| The origin of this software must not be misrepresented; you must not claim that -| you wrote the original software. If you use this software in a product, an -| acknowledgment (see the following) in the product documentation is required. -| -| Portions Copyright Š 2002-2012 Charlie Poole or Copyright Š 2002-2004 James W. -| Newkirk, Michael C. Two, Alexei A. Vorontsov or Copyright Š 2000-2002 Philip A. -| Craig -| -| Altered source versions must be plainly marked as such, and must not be -| misrepresented as being the original software. -| -| This notice may not be removed or altered from any source distribution. -| License Note -| -| This license is based on the open source zlib/libpng license. The idea was to -| keep the license as simple as possible to encourage use of NUnit in free and -| commercial applications and libraries, but to keep the source code together and -| to give credit to the NUnit contributors for their efforts. While this license -| allows shipping NUnit in source and binary form, if shipping a NUnit variant is -| the sole purpose of your product, please let us know. - ----------------------------------------------------------------------- -License for the Json.NET binary included with the C# implementation: -File: Newtonsoft.Json.dll - -Copyright (c) 2007 James Newton-King - -| Permission is hereby granted, free of charge, to any person obtaining -| a copy of this software and associated documentation files (the -| "Software"), to deal in the Software without restriction, including -| without limitation the rights to use, copy, modify, merge, publish, -| distribute, sublicense, and/or sell copies of the Software, and to -| permit persons to whom the Software is furnished to do so, subject to -| the following conditions: -| -| The above copyright notice and this permission notice shall be -| included in all copies or substantial portions of the Software. -| -| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -| EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -| MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -| NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -| LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -| OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -| WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - ----------------------------------------------------------------------- -License for the Castle Core binary included with the C# implementation: -File: Castle.Core.dll - -Copyright (c) 2004-2015 Castle Project - -License: Apache License version 2.0 (see above) -URL: https://opensource.org/licenses/Apache-2.0 - ----------------------------------------------------------------------- -License for the log4net binary included with the C# implementation: -File: log4net.dll - -Copyright 2004-2015 The Apache Software Foundation. - -License: Apache License version 2.0 (see above) - ----------------------------------------------------------------------- -License for the m4 macros used by the C++ implementation: - -Files: -* lang/c++/m4/m4_ax_boost_system.m4 - Copyright (c) 2008 Thomas Porschberg - Copyright (c) 2008 Michael Tindal - Copyright (c) 2008 Daniel Casimiro -* lang/c++/m4/m4_ax_boost_asio.m4 - Copyright (c) 2008 Thomas Porschberg - Copyright (c) 2008 Pete Greenwell -* lang/c++/m4/m4_ax_boost_filesystem.m4 - Copyright (c) 2009 Thomas Porschberg - Copyright (c) 2009 Michael Tindal - Copyright (c) 2009 Roman Rybalko -* lang/c++/m4/m4_ax_boost_thread.m4 - Copyright (c) 2009 Thomas Porschberg - Copyright (c) 2009 Michael Tindal -* lang/c++/m4/m4_ax_boost_regex.m4 - Copyright (c) 2008 Thomas Porschberg - Copyright (c) 2008 Michael Tindal -* lang/c++/m4/m4_ax_boost_base.m4 - Copyright (c) 2008 Thomas Porschberg - -License text: -| Copying and distribution of this file, with or without modification, are -| permitted in any medium without royalty provided the copyright notice -| and this notice are preserved. This file is offered as-is, without any -| warranty. - ----------------------------------------------------------------------- -License for the AVRO_BOOT_NO_TRAIT code in the C++ implementation: -File: lang/c++/api/Boost.hh - -| Boost Software License - Version 1.0 - August 17th, 2003 -| -| Permission is hereby granted, free of charge, to any person or organization -| obtaining a copy of the software and accompanying documentation covered by -| this license (the "Software") to use, reproduce, display, distribute, -| execute, and transmit the Software, and to prepare derivative works of the -| Software, and to permit third-parties to whom the Software is furnished to -| do so, all subject to the following: -| -| The copyright notices in the Software and this entire statement, including -| the above license grant, this restriction and the following disclaimer, -| must be included in all copies of the Software, in whole or in part, and -| all derivative works of the Software, unless such copies or derivative -| works are solely in the form of machine-executable object code generated by -| a source language processor. -| -| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -| FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT -| SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE -| FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, -| ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -| DEALINGS IN THE SOFTWARE. - ---------------------------------------------------------------------- License for jquery.tipsy.js, tipsy.js, and tipsy.css used by the Java IPC implementation: diff --git a/NOTICE.txt b/NOTICE.txt index 737629b09ba..cd9dd5de0d5 100644 --- a/NOTICE.txt +++ b/NOTICE.txt @@ -1,61 +1,14 @@ Apache Avro -Copyright 2010-2019 The Apache Software Foundation +Copyright 2010-2025 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (https://www.apache.org/). -NUnit license acknowledgement: - -| Portions Copyright Š 2002-2012 Charlie Poole or Copyright Š 2002-2004 James -| W. Newkirk, Michael C. Two, Alexei A. Vorontsov or Copyright Š 2000-2002 -| Philip A. Craig - -Based upon the representations of upstream licensors, it is understood that -portions of the mapreduce API included in the Java implementation are licensed -from various contributors under one or more contributor license agreements to -Odiago, Inc. and were then contributed by Odiago to Apache Avro, which has now -made them available under the Apache 2.0 license. The original file header text -is: - -| Licensed to Odiago, Inc. under one or more contributor license -| agreements. See the NOTICE file distributed with this work for -| additional information regarding copyright ownership. Odiago, Inc. -| licenses this file to you under the Apache License, Version 2.0 -| (the "License"); you may not use this file except in compliance -| with the License. You may obtain a copy of the License at -| -| https://www.apache.org/licenses/LICENSE-2.0 -| -| Unless required by applicable law or agreed to in writing, software -| distributed under the License is distributed on an "AS IS" BASIS, -| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -| implied. See the License for the specific language governing -| permissions and limitations under the License. - The Odiago NOTICE at the time of the contribution: | This product includes software developed by Odiago, Inc. | (https://www.wibidata.com). -Apache Ivy includes the following in its NOTICE file: - -| Apache Ivy -| Copyright 2007-2010 The Apache Software Foundation -| -| This product includes software developed by -| The Apache Software Foundation (https://www.apache.org/). -| -| Portions of Ivy were originally developed by -| Jayasoft SARL (http://www.jayasoft.fr/) -| and are licensed to the Apache Software Foundation under the -| "Software Grant License Agreement" -| -| SSH and SFTP support is provided by the JCraft JSch package, -| which is open source software, available under -| the terms of a BSD style license. -| The original software and related information is available -| at http://www.jcraft.com/jsch/. - Apache Log4Net includes the following in its NOTICE file: | Apache log4net diff --git a/README.md b/README.md index 472656a3eb4..19bcc166922 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,12 @@ -# Apache Avroâ„ĸ +> [!IMPORTANT] +> The Rust SDK is moving to https://github.com/apache/avro-rs. Please use it for [new issues](https://github.com/apache/avro-rs/issues/new) + and [pull requests](https://github.com/apache/avro-rs/pulls)! + + Apache Avroâ„ĸAvro Logo +============ + +### Current CI status (Github servers) [![test c][test c img]][test c] [![test c#][test c# img]][test c#] [![test c++][test c++ img]][test c++] @@ -10,10 +17,7 @@ [![test python][test python img]][test python] [![test php][test php img]][test php] -[![rust continuous integration][rust continuous integration img]][rust continuous integration] -[![rust clippy check][rust clippy check img]][rust clippy check] -[![rust security audit][rust security audit img]][rust security audit] - +### Current CodeQL status [![codeql c#][codeql c# img]][codeql c#] [![codeql java][codeql java img]][codeql java] [![codeql javascript][codeql javascript img]][codeql javascript] @@ -31,6 +35,7 @@ To contribute to Avro, please read: https://cwiki.apache.org/confluence/display/AVRO/How+To+Contribute + [test c]: https://github.com/apache/avro/actions/workflows/test-lang-c.yml @@ -43,10 +48,6 @@ To contribute to Avro, please read: [test python]: https://github.com/apache/avro/actions/workflows/test-lang-py.yml [test php]: https://github.com/apache/avro/actions/workflows/test-lang-php.yml -[rust continuous integration]: https://github.com/apache/avro/actions/workflows/test-lang-rust-ci.yml -[rust clippy check]: https://github.com/apache/avro/actions/workflows/test-lang-rust-clippy.yml -[rust security audit]: https://github.com/apache/avro/actions/workflows/test-lang-rust-audit.yml - [codeql c#]: https://github.com/apache/avro/actions/workflows/codeql-csharp-analysis.yml [codeql java]: https://github.com/apache/avro/actions/workflows/codeql-java-analysis.yml [codeql javascript]: https://github.com/apache/avro/actions/workflows/codeql-js-analysis.yml @@ -62,11 +63,18 @@ To contribute to Avro, please read: [test python img]: https://github.com/apache/avro/actions/workflows/test-lang-py.yml/badge.svg [test php img]: https://github.com/apache/avro/actions/workflows/test-lang-php.yml/badge.svg -[rust continuous integration img]: https://github.com/apache/avro/actions/workflows/test-lang-rust-ci.yml/badge.svg -[rust clippy check img]: https://github.com/apache/avro/actions/workflows/test-lang-rust-clippy.yml/badge.svg -[rust security audit img]: https://github.com/apache/avro/actions/workflows/test-lang-rust-audit.yml/badge.svg - [codeql c# img]: https://github.com/apache/avro/actions/workflows/codeql-csharp-analysis.yml/badge.svg [codeql java img]: https://github.com/apache/avro/actions/workflows/codeql-java-analysis.yml/badge.svg [codeql javascript img]: https://github.com/apache/avro/actions/workflows/codeql-js-analysis.yml/badge.svg [codeql python img]: https://github.com/apache/avro/actions/workflows/codeql-py-analysis.yml/badge.svg + +You can use devcontainers to develop Avro: + +* [![Open in Visual Studio Code](https://img.shields.io/static/v1?label=&message=Open%20in%20Visual%20Studio%20Code&color=blue&logo=visualstudiocode&style=flat)](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/apache/avro) +* [![Open in Github Codespaces](https://img.shields.io/static/v1?label=&message=Open%20in%20Github%20Codespaces&color=2f362d&logo=github)](https://codespaces.new/apache/avro?quickstart=1&hide_repo_select=true) + + +### Trademark & logos +ApacheÂŽ, Apache Avro and the Apache Avro airplane logo are trademarks of The Apache Software Foundation. + +The Apache Avro airplane logo on this page has been designed by [Emma Kellam](https://github.com/emmak3l) for use by this project. diff --git a/build.sh b/build.sh index 0a5f158614a..67cbd23c39e 100755 --- a/build.sh +++ b/build.sh @@ -39,6 +39,9 @@ change_java_version() { # =========================================================================== +# This might not have been sourced if the entrypoint is not bash +[[ -f "$HOME/.cargo/env" ]] && . "$HOME/.cargo/env" + set -xe cd "${0%/*}" @@ -53,6 +56,9 @@ DOCKER_BUILD_XTRA_ARGS=${DOCKER_BUILD_XTRA_ARGS-} # Override the docker image name used. DOCKER_IMAGE_NAME=${DOCKER_IMAGE_NAME-} +# When building a docker container, these are the files that will sent and available. +DOCKER_EXTRA_CONTEXT="lang/ruby/Gemfile lang/ruby/avro.gemspec lang/ruby/Manifest share/VERSION.txt" + usage() { echo "Usage: $0 {lint|test|dist|sign|clean|veryclean|docker [--args \"docker-args\"]|rat|githooks|docker-test}" exit 1 @@ -101,14 +107,13 @@ do (cd lang/ruby; ./build.sh lint test) (cd lang/php; ./build.sh lint test) (cd lang/perl; ./build.sh lint test) - (cd lang/rust; ./build.sh lint test) (cd lang/py; ./build.sh interop-data-generate) (cd lang/c; ./build.sh interop-data-generate) #(cd lang/c++; make interop-data-generate) (cd lang/csharp; ./build.sh interop-data-generate) (cd lang/js; ./build.sh interop-data-generate) - (cd lang/ruby; rake generate_interop) + (cd lang/ruby; ./build.sh interop-data-generate) (cd lang/php; ./build.sh interop-data-generate) (cd lang/perl; ./build.sh interop-data-generate) @@ -119,7 +124,7 @@ do #(cd lang/c++; make interop-data-test) (cd lang/csharp; ./build.sh interop-data-test) (cd lang/js; ./build.sh interop-data-test) - (cd lang/ruby; rake interop) + (cd lang/ruby; ./build.sh interop-data-test) (cd lang/php; ./build.sh test-interop) (cd lang/perl; ./build.sh interop-data-test) @@ -151,6 +156,9 @@ do # runs RAT on artifacts mvn -N -P rat antrun:run verify + # install java artifacts required by other builds and interop tests + mvn -B install -DskipTests + mkdir -p dist (cd build; tar czf "../dist/${SRC_DIR}.tar.gz" "${SRC_DIR}") @@ -167,14 +175,20 @@ do (cd lang/js; ./build.sh dist) (cd lang/ruby; ./build.sh dist) (cd lang/php; ./build.sh dist) - (cd lang/rust; ./build.sh dist) mkdir -p dist/perl (cd lang/perl; ./build.sh dist) cp "lang/perl/Avro-$VERSION.tar.gz" dist/perl/ # build docs - (cd doc; ant) + cp -r doc/ build/staging-web/ + find build/staging-web/ -type f -print0 | xargs -0 sed -r -i "s#\+\+version\+\+#${VERSION,,}#g" + mkdir -p build/staging-web/public/docs/ + mv build/staging-web/content/en/docs/++version++ build/staging-web/public/docs/"${VERSION,,}" + (cd build/staging-web/ && npm install && hugo --gc --minify) + cp -R build/staging-web/public/docs/"${VERSION,,}"/* "build/$DOC_DIR/" + cp -R "build/$DOC_DIR/api" build/staging-web/public/docs/"${VERSION,,}"/ + ( cd build/staging-web/public/docs/; ln -s "${VERSION,,}" current ) # add LICENSE and NOTICE for docs mkdir -p "build/$DOC_DIR" cp doc/LICENSE "build/$DOC_DIR" @@ -198,7 +212,13 @@ do \! -name '*.asc' \! -name '*.txt' ); do (cd "${f%/*}" && shasum -a 512 "${f##*/}") > "$f.sha512" - gpg --passphrase "$password" --armor --output "$f.asc" --detach-sig "$f" + + if [ -z "$GPG_LOCAL_USER" ]; then + gpg --pinentry-mode loopback --passphrase "$password" --armor --output "$f.asc" --detach-sig "$f" + else + gpg --pinentry-mode loopback --local-user="$GPG_LOCAL_USER" --passphrase "$password" --armor --output "$f.asc" --detach-sig "$f" + fi + done set -x @@ -206,7 +226,7 @@ do clean) rm -rf build dist - (cd doc; ant clean) + rm -rf doc/public/ doc/resources/ doc/node_modules/ doc/package-lock.json doc/.hugo_build.lock (mvn -B clean) rm -rf lang/java/*/userlogs/ @@ -229,12 +249,11 @@ do (cd lang/perl; ./build.sh clean) - (cd lang/rust; ./build.sh clean) ;; veryclean) rm -rf build dist - (cd doc; ant clean) + rm -rf doc/public/ doc/resources/ doc/node_modules/ doc/package-lock.json doc/.hugo_build.lock (mvn -B clean) rm -rf lang/java/*/userlogs/ @@ -257,19 +276,17 @@ do (cd lang/perl; ./build.sh clean) - (cd lang/rust; ./build.sh clean) - rm -rf lang/c++/build rm -rf lang/js/node_modules rm -rf lang/perl/inc/ rm -rf lang/ruby/.gem/ rm -rf lang/ruby/Gemfile.lock - rm -rf lang/py/lib/ivy-2.2.0.jar rm -rf lang/csharp/src/apache/ipc.test/bin/ rm -rf lang/csharp/src/apache/ipc.test/obj ;; docker) + echo "NB: for Docker Desktop users on MacOS, the default file sharing implementation (VirtioFS) has issues with some operations. You should better use gRPC FUSE or osxfs." if [[ $1 =~ ^--args ]]; then DOCKER_RUN_XTRA_ARGS=$2 shift 2 @@ -286,15 +303,22 @@ do DOCKER_IMAGE_NAME=${DOCKER_IMAGE_NAME:-"avro-build-$USER_NAME:latest"} { cat share/docker/Dockerfile - grep -vF 'FROM avro-build-ci' share/docker/DockerfileLocal - echo "ENV HOME /home/$USER_NAME" + echo "ENV HOME=/home/$USER_NAME" + echo "RUN getent passwd $USER_ID && userdel \$(getent passwd $USER_ID | cut -d: -f1)" echo "RUN getent group $GROUP_ID || groupadd -g $GROUP_ID $USER_NAME" - echo "RUN getent passwd $USER_ID || useradd -g $GROUP_ID -u $USER_ID -k /root -m $USER_NAME" + echo "RUN useradd -N -g $GROUP_ID -u $USER_ID -k /root -m $USER_NAME" + echo "RUN mkdir -p /home/$USER_NAME/.m2/repository" + echo "RUN chown -R --reference=/home/$USER_NAME /home/$USER_NAME/.m2/" } > Dockerfile + + if [ -z "$BUILDPLATFORM" ]; then + export BUILDPLATFORM=$(docker info --format "{{.OSType}}/{{.Architecture}}") + fi + # Include the ruby gemspec for preinstallation. # shellcheck disable=SC2086 - tar -cf- lang/ruby/Gemfile Dockerfile | docker build $DOCKER_BUILD_XTRA_ARGS -t "$DOCKER_IMAGE_NAME" - + tar -cf- Dockerfile $DOCKER_EXTRA_CONTEXT | DOCKER_BUILDKIT=1 docker build $DOCKER_BUILD_XTRA_ARGS --build-arg="BUILDPLATFORM=${BUILDPLATFORM}" -t "$DOCKER_IMAGE_NAME" - rm Dockerfile - # By mapping the .m2 directory you can do an mvn install from + # By mapping the .m2/repository directory you can do an mvn install from # within the container and use the result on your normal # system. And this also is a significant speedup in subsequent # builds because the dependencies are downloaded only once. @@ -306,10 +330,10 @@ do # extra second before the changes are available within the docker container. # shellcheck disable=SC2086 docker run --rm -t -i \ - --env "JAVA=${JAVA:-8}" \ + --env "JAVA=${JAVA:-21}" \ --user "${USER_NAME}" \ --volume "${HOME}/.gnupg:/home/${USER_NAME}/.gnupg" \ - --volume "${HOME}/.m2:/home/${USER_NAME}/.m2${DOCKER_MOUNT_FLAG}" \ + --volume "${HOME}/.m2/repository:/home/${USER_NAME}/.m2/repository${DOCKER_MOUNT_FLAG}" \ --volume "${PWD}:/home/${USER_NAME}/avro${DOCKER_MOUNT_FLAG}" \ --workdir "/home/${USER_NAME}/avro" \ ${DOCKER_RUN_XTRA_ARGS} "$DOCKER_IMAGE_NAME" ${DOCKER_RUN_ENTRYPOINT} @@ -327,9 +351,15 @@ do ;; docker-test) - tar -cf- share/docker/Dockerfile lang/ruby/Gemfile | - docker build -t avro-test -f share/docker/Dockerfile - - docker run --rm -v "${PWD}:/avro${DOCKER_MOUNT_FLAG}" --env "JAVA=${JAVA:-8}" avro-test /avro/share/docker/run-tests.sh + if [ -z "$BUILDPLATFORM" ]; then + export BUILDPLATFORM=$(docker info --format "{{.OSType}}/{{.Architecture}}") + fi + tar -cf- share/docker/Dockerfile $DOCKER_EXTRA_CONTEXT | + DOCKER_BUILDKIT=1 docker build -t avro-test --build-arg BUILDPLATFORM="${BUILDPLATFORM}" -f share/docker/Dockerfile - + docker run --rm \ + --volume "${PWD}:/avro${DOCKER_MOUNT_FLAG}" \ + --volume "${PWD}/share/docker/m2/:/root/.m2/" \ + --env "JAVA=${JAVA:-11}" avro-test /avro/share/docker/run-tests.sh ;; *) diff --git a/composer.json b/composer.json index e5f1313aeba..ec5d2a3f37b 100644 --- a/composer.json +++ b/composer.json @@ -3,11 +3,73 @@ "description": "Apache Avroâ„ĸ is a data serialization system.", "minimum-stability": "stable", "license": "Apache-2.0", + "homepage": "http://avro.apache.org", + "type": "library", + "keywords": [ + "avro", + "data", + "serialization" + ], + "readme": "README.md", + "authors": [ + { + "name": "Apache Avro Developers", + "email": "dev@avro.apache.org", + "homepage": "http://avro.apache.org" + } + ], + "support": { + "email": "dev@avro.apache.org", + "issues": "https://issues.apache.org/jira/browse/AVRO" + }, "require": { - "beberlei/composer-monorepo-plugin": "0.16.5" + "php": "^8.1" }, + "deps": [ + "vendor/phpunit/phpunit" + ], "require-dev": { - "phpunit/phpunit": "^9.1", - "squizlabs/php_codesniffer": "^3.5" + "phpunit/phpunit": "^10.5", + "php-mock/php-mock-phpunit": "^2.10", + "ext-json": "*", + "ext-xml": "*", + "ext-curl": "*", + "ext-pcntl": "*", + "rector/rector": "^2.2", + "friendsofphp/php-cs-fixer": "^3.89", + "phpstan/phpstan": "^2.1" + }, + "autoload": { + "psr-4": { + "Apache\\Avro\\": "lang/php/lib/" + } + }, + "autoload-dev": { + "psr-4": { + "Apache\\Avro\\Tests\\": "lang/php/test/" + } + }, + "extra": { + "branch-alias": { + "dev-master": "1.0.x-dev" + } + }, + "archive": { + "exclude": [ + "*", + ".*", + "!/CHANGES.md", + "!/LICENSE", + "!/NOTICE", + "!/README.md", + "!/composer.json", + "!/lang/php/README.md", + "!/lang/php/lib" + ] + }, + "config": { + "allow-plugins": { + "beberlei/composer-monorepo-plugin": true + } } } diff --git a/doc/.gitignore b/doc/.gitignore index 567609b1234..48b779e950c 100644 --- a/doc/.gitignore +++ b/doc/.gitignore @@ -1 +1,4 @@ -build/ +public/ +resources/ +node_modules/ +.hugo_build.lock diff --git a/doc/LICENSE b/doc/LICENSE index af6b6731242..e0f8f08e158 100644 --- a/doc/LICENSE +++ b/doc/LICENSE @@ -306,12 +306,6 @@ Prototype JavaScript framework, version 1.4.0_pre4 For a copy of the MIT license text, see above. ----------------------------------------------------------------------- -License for Apache Forrest (skin), included in the Avro documentation: - -Copyright: 2009-2015 The Apache Software Foundation -License: https://www.apache.org/licenses/LICENSE-2.0 (see above) - ---------------------------------------------------------------------- License for Doxygen-generated documentation for the C++ and C# implementations: diff --git a/doc/NOTICE b/doc/NOTICE index 8b7999217fd..7320bb0adfc 100644 --- a/doc/NOTICE +++ b/doc/NOTICE @@ -1,5 +1,5 @@ Apache Avro -Copyright 2010-2015 The Apache Software Foundation +Copyright 2010-2022 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (https://www.apache.org/). @@ -26,16 +26,9 @@ is: | implied. See the License for the specific language governing | permissions and limitations under the License. -The Odiago NOTICE at the time of the contribution: - -| This product includes software developed by Odiago, Inc. -| (https://www.wibidata.com). - -The documentation contains the default Apache Forrest skin. -Apache Forrest includes the following in its NOTICE file: - -| Apache Forrest -| Copyright 2002-2007 The Apache Software Foundation. +|-------------------------------------------------------------------------- +| This product includes software developed by The Docsy Authors. +| (https://www.docsy.dev/). | | This product includes software developed at | The Apache Software Foundation (https://www.apache.org/). @@ -49,35 +42,3 @@ Apache Forrest includes the following in its NOTICE file: | Other accompanying products do not require attribution, so are not listed. | | ------------------------------------------------------------------------ -| This product includes software developed by the OpenSymphony Group -| http://www.opensymphony.com/ -| -| This product includes software developed for project Krysalis -| http://www.krysalis.org/ -| -| This product includes software developed by Andy Clark. -| https://people.apache.org/~andyc/neko/ -| -| This product includes software developed by the ExoLab Project -| https://www.exolab.org/ -| -| This product includes software developed by TouchGraph LLC -| https://www.touchgraph.com/ -| -| This product includes software developed by Marc De Scheemaecker -| http://nanoxml.cyberelf.be/ -| -| This product includes software developed by the ANTLR project -| https://www.antlr.org/ -| -| This product includes software developed by Chaperon -| http://chaperon.sourceforge.net/ -| -| This product includes software developed by Sal Mangano (included in the XSLT Cookbook published by O'Reilly) -| https://www.oreilly.com/catalog/xsltckbk/ -| -| This product includes software developed by The Werken Company. -| http://jaxen.werken.com/ -| -| This product includes software developed by the jfor project -| http://www.jfor.org/ diff --git a/doc/README.md b/doc/README.md new file mode 100644 index 00000000000..acd139aa01a --- /dev/null +++ b/doc/README.md @@ -0,0 +1,51 @@ +# Apache Avro website + +This website is base on [Hugo](https://gohugo.io) and uses the [Docsy](https://www.docsy.dev/) theme. +Before building the website, you need to initialize submodules. + +```sh +hugo mod get -u +``` + +## Previewing the website locally + +```sh +# Serve the website dynamically using extended hugo: +hugo server --buildDrafts --buildFuture --bind 0.0.0.0 --navigateToChanged + +# You can do the same thing without installing hugo via docker. +# From the Avro root directory: +docker run --rm -v $(pwd):/src -p 1313:1313 jakejarvis/hugo-extended:latest --source doc/ server \ + --buildDrafts --buildFuture --bind 0.0.0.0 --navigateToChanged +``` + +## New release + +When a new version of Apache Avro is released: + +1. Change the value of `params.avroversion` in `config.toml` +2. Add a new entry to the `Releases` pages in the `Blog` section, for example: + +```sh +cp content/en/blog/releases/avro-1.12.0-released.md content/en/blog/releases/avro-1.13.0-released.md +``` + +### Upload the docs + +Copy the Markdown content from the release tar to the `doc/content/en/docs/1.12.0`: + +```sh +tar xvfz avro-src-1.12.0.tar.gz +``` + +Here we need to copy everything, except the `api/` directory to this repository. The markdown will be rendered using Hugo, and the API docs are already html, and will be served from the ASF SVN. The `api/` directory needs to be uploaded to SVN: + +```sh +svn co https://svn.apache.org/repos/asf/avro/site +cd site/publish/docs/ +mkdir 1.12.0 +cd 1.12.0 +mkdir api +cp -r ~/Desktop/avro-release-dist/avro-1.12.0/avro-doc-1.12.0/api/ api/ +svn commit -m "Avro 1.12.0 release" +``` diff --git a/doc/assets/images/logo-black-text.png b/doc/assets/images/logo-black-text.png new file mode 100644 index 00000000000..c593f891c1d Binary files /dev/null and b/doc/assets/images/logo-black-text.png differ diff --git a/doc/assets/images/logo-text.svg b/doc/assets/images/logo-text.svg new file mode 100644 index 00000000000..6f842518618 --- /dev/null +++ b/doc/assets/images/logo-text.svg @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + + diff --git a/doc/assets/images/logo-white-text.png b/doc/assets/images/logo-white-text.png new file mode 100644 index 00000000000..29ad9ed4fd0 Binary files /dev/null and b/doc/assets/images/logo-white-text.png differ diff --git a/doc/assets/images/logo.png b/doc/assets/images/logo.png new file mode 100644 index 00000000000..5651de93f58 Binary files /dev/null and b/doc/assets/images/logo.png differ diff --git a/doc/assets/images/logo.svg b/doc/assets/images/logo.svg new file mode 100644 index 00000000000..beee014a3d5 --- /dev/null +++ b/doc/assets/images/logo.svg @@ -0,0 +1,27 @@ + + + + + + + + diff --git a/doc/assets/scss/PTMono-Regular.ttf b/doc/assets/scss/PTMono-Regular.ttf new file mode 100644 index 00000000000..b1983838c66 Binary files /dev/null and b/doc/assets/scss/PTMono-Regular.ttf differ diff --git a/doc/assets/scss/_styles_project.scss b/doc/assets/scss/_styles_project.scss new file mode 100644 index 00000000000..ec1ca5926d6 --- /dev/null +++ b/doc/assets/scss/_styles_project.scss @@ -0,0 +1,62 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +@font-face { + font-family: 'PT Mono'; + font-style: normal; + font-weight: 400; + font-display: swap; + src: url(./PTMono-Regular.ttf) format('truetype'); +} + +// Disable all github editing links for now +.td-page-meta--view { display: none !important; } +.td-page-meta--edit { display: none !important; } +.td-page-meta--child { display: none !important; } +.td-page-meta--issue { display: none !important; } +.td-page-meta--project-issue { display: none !important; } + +.navbar-brand { + font-family: "PT Mono", monospace; +} + +@media (max-width: 992px) { + + footer .row { + display: flex; + flex-direction: column; + align-items: center; + } + + footer .row > div { + width: 100%; + max-width: 100%; + text-align: center; + margin-bottom: 1.25rem; + } + + footer ul.list-inline { + display: flex; + justify-content: center; + flex-wrap: wrap; + } + + footer ul.list-inline li { + margin: 0.4rem; + } +} + diff --git a/doc/assets/scss/_variables_project.scss b/doc/assets/scss/_variables_project.scss new file mode 100644 index 00000000000..fb5495c8669 --- /dev/null +++ b/doc/assets/scss/_variables_project.scss @@ -0,0 +1,21 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +.dropdown-menu .show { + overflow-y: auto; + max-height: 700px; +} diff --git a/doc/build.xml b/doc/build.xml deleted file mode 100644 index d711608a36d..00000000000 --- a/doc/build.xml +++ /dev/null @@ -1,56 +0,0 @@ - - - - - - - - - - - - - - - - - - - <!ENTITY AvroVersion "${version}"> - - - - - - - - - - - - - - - - - - - - diff --git a/doc/config.toml b/doc/config.toml new file mode 100644 index 00000000000..fa3b0033676 --- /dev/null +++ b/doc/config.toml @@ -0,0 +1,427 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +baseURL = "/" +title = "Apache Avro" + +# Language settings +contentDir = "content/en" +defaultContentLanguage = "en" +defaultContentLanguageInSubdir = false +# Useful when translating. +enableMissingTranslationPlaceholders = true + +enableRobotsTXT = true + +# Will give values to .Lastmod etc. +enableGitInfo = true + +# Comment out to disable taxonomies in Docsy +# disableKinds = ["taxonomy", "taxonomyTerm"] + +# You can add your own taxonomies +[taxonomies] +tag = "tags" +category = "categories" + +[params.taxonomy] +# set taxonomyCloud = [] to hide taxonomy clouds +taxonomyCloud = ["tags", "categories"] + +# If used, must have same lang as taxonomyCloud +taxonomyCloudTitle = ["Tag Cloud", "Categories"] + +# set taxonomyPageHeader = [] to hide taxonomies on the page headers +taxonomyPageHeader = ["tags", "categories"] + + +# Highlighting config +pygmentsCodeFences = true +pygmentsUseClasses = false +# Use the new Chroma Go highlighter in Hugo. +pygmentsUseClassic = false +#pygmentsOptions = "linenos=table" +# See https://help.farbox.com/pygments.html +pygmentsStyle = "tango" + +# Configure how URLs look like per section. +[permalinks] +blog = "/:section/:year/:month/:day/:slug/" + +## Configuration for BlackFriday markdown parser: https://github.com/russross/blackfriday +[blackfriday] +plainIDAnchors = true +hrefTargetBlank = true +angledQuotes = false +latexDashes = true + +# Image processing configuration. +[imaging] +resampleFilter = "CatmullRom" +quality = 75 +anchor = "smart" + +[services] +[services.googleAnalytics] +# Comment out the next line to disable GA tracking. Also disables the feature described in [params.ui.feedback]. +# id = "UA-00000000-0" + +# Language configuration + +[languages.params] +[languages.en.params] +title = "Apache Avro" +description = "" +languageName ="English" +# Weight used for sorting. +weight = 1 + +[markup] + [markup.goldmark] + [markup.goldmark.renderer] + unsafe = true + [markup.highlight] + # See a complete list of available styles at https://xyproto.github.io/splash/docs/all.html + style = "tango" + # Uncomment if you want your chosen highlight style used for code blocks without a specified language + # guessSyntax = "true" + +# Everything below this are Site Params + +# Comment out if you don't want the "print entire section" link enabled. +[outputs] +section = ["HTML", "print", "RSS"] + +[params] +avroversion = "1.12.1" +copyright = "The Apache Software Foundation" +apache_foundation = "https://www.apache.org/" +apache_events_logo = "https://www.apache.org/events/current-event-234x60.png" +apache_events_url = "https://www.apache.org/events/current-event.html" +privacy_policy = "http://www.apache.org/foundation/policies/privacy.html" +license = "http://www.apache.org/licenses/" + +# First one is picked as the Twitter card image if not set on page. +# images = ["images/project-illustration.png"] + +# Menu title if your navbar has a versions selector to access old versions of your site. +# This menu appears only if you have at least one [params.versions] set. +version_menu = "Releases" + +# Flag used in the "version-banner" partial to decide whether to display a +# banner on every page indicating that this is an archived version of the docs. +# Set this flag to "true" if you want to display the banner. +archived_version = false + +# The version number for the version of the docs represented in this doc set. +# Used in the "version-banner" partial to display a version number for the +# current doc set. +version = "1.12.1" + +# A link to latest version of the docs. Used in the "version-banner" partial to +# point people to the main doc site. +url_latest_version = "https://avro.apache.org" + +# Repository configuration (URLs for in-page links to opening issues and suggesting changes) +github_repo = "https://github.com/apache/avro" +github_subdir = "doc" + +# An optional link to a related project repo. For example, the sibling repository where your product code lives. +github_project_repo = "https://github.com/apache/avro" + +# Specify a value here if your content directory is not in your repo's root directory +# github_subdir = "" + +# Uncomment this if you have a newer GitHub repo with "main" as the default branch, +# or specify a new value if you want to reference another branch in your GitHub links +# github_branch= "main" + +# Google Custom Search Engine ID. Remove or comment out to disable search. +# gcs_engine_id = "d72aa9b2712488cc3" + +# Enable Lunr.js offline search +offlineSearch = false + +# Enable syntax highlighting and copy buttons on code blocks with Prism +prism_syntax_highlighting = true + +# User interface configuration +[params.ui] +# Set to true to disable breadcrumb navigation. +breadcrumb_disable = false +# Set to true to disable the About link in the site footer +footer_about_disable = true +# Set to false if you don't want to display a logo (/assets/icons/logo.svg) in the top navbar +navbar_logo = true +# Set to true if you don't want the top navbar to be translucent when over a `block/cover`, like on the homepage. +navbar_translucent_over_cover_disable = false +# Enable to show the side bar menu in its compact state. +sidebar_menu_compact = false +# Set to true to hide the sidebar search box (the top nav search box will still be displayed if search is enabled) +sidebar_search_disable = true +sidebar_menu_foldable = true + +# Adds a H2 section titled "Feedback" to the bottom of each doc. The responses are sent to Google Analytics as events. +# This feature depends on [services.googleAnalytics] and will be disabled if "services.googleAnalytics.id" is not set. +# If you want this feature, but occasionally need to remove the "Feedback" section from a single page, +# add "hide_feedback: true" to the page's front matter. +[params.ui.feedback] +enable = false +# The responses that the user sees after clicking "yes" (the page was helpful) or "no" (the page was not helpful). +yes = 'Glad to hear it! Please tell us how we can improve.' +no = 'Sorry to hear that. Please tell us how we can improve.' + +# Adds a reading time to the top of each doc. +# If you want this feature, but occasionally need to remove the Reading time from a single page, +# add "hide_readingtime: true" to the page's front matter +[params.ui.readingtime] +enable = true + +[params.asf] +[[params.asf.links]] +name = "ASF Web Site" +url = "http://www.apache.org/" + +[[params.asf.links]] +name = "License" +url = "http://www.apache.org/licenses/" + +[[params.asf.links]] +name = "Donate" +url = "http://www.apache.org/foundation/sponsorship.html" + + +[[params.asf.links]] +name = "Thanks" +url = "http://www.apache.org/foundation/thanks.html" + +[[params.asf.links]] +name = "Security" +url = "http://www.apache.org/security/" + + +[params.links] +# End user relevant links. These will show up on left side of footer and in the community page if you have one. +[[params.links.user]] + name = "User mailing list" + url = "https://lists.apache.org/list.html?user@avro.apache.org" + icon = "fa fa-envelope" + desc = "Discussion and help from your fellow users" +[[params.links.user]] + name ="Twitter" + url = "https://twitter.com/ApacheAvro" + icon = "fab fa-twitter" + desc = "Follow us on Twitter to get the latest news!" +[[params.links.user]] + name = "Stack Overflow" + url = "https://stackoverflow.com/questions/tagged/avro" + icon = "fab fa-stack-overflow" + desc = "Practical questions and curated answers" +# Developer relevant links. These will show up on right side of footer and in the community page if you have one. +[[params.links.developer]] + name = "GitHub" + url = "https://github.com/apache/avro" + icon = "fab fa-github" + desc = "Development takes place here!" +[[params.links.developer]] + name = "Issues" + url = "https://issues.apache.org/jira/projects/AVRO/issues" + icon = "fab fa-jira" + desc = "Track bugs and new features" +[[params.links.developer]] + name = "Chat with other project developers at Slack" + url = "https://the-asf.slack.com/" + icon = "fab fa-slack" + desc = "Chat with other project developers at #avro channel" +[[params.links.developer]] + name = "Developer mailing list" + url = "https://lists.apache.org/list.html?dev@avro.apache.org" + icon = "fa fa-envelope" + desc = "Discuss development issues around the project" + +[[params.versions]] +version = "1.12.0" +url = "https://avro.apache.org/docs/1.12.0/" + +[[params.versions]] +version = "1.11.4" +url = "https://avro.apache.org/docs/1.11.4/" + +[[params.versions]] +version = "1.11.3" +url = "https://avro.apache.org/docs/1.11.3/" + +[[params.versions]] +version = "1.11.2" +url = "https://avro.apache.org/docs/1.11.2/" + +[[params.versions]] +version = "1.11.1" +url = "https://avro.apache.org/docs/1.11.1/" + +[[params.versions]] +version = "1.11.0" +url = "https://avro.apache.org/docs/1.11.0/" + +[[params.versions]] + version = "1.10.2" + url = "https://avro.apache.org/docs/1.10.2/" + +[[params.versions]] + version = "1.10.1" + url = "https://avro.apache.org/docs/1.10.1/" + +[[params.versions]] + version = "1.10.0" + url = "https://avro.apache.org/docs/1.10.0/" + +[[params.versions]] + version = "1.9.2" + url = "https://avro.apache.org/docs/1.9.2/" + +[[params.versions]] + version = "1.9.1" + url = "https://avro.apache.org/docs/1.9.1/" + +[[params.versions]] + version = "1.9.0" + url = "https://avro.apache.org/docs/1.9.0/" + +[[params.versions]] + version = "1.8.2" + url = "https://avro.apache.org/docs/1.8.2/" + +[[params.versions]] + version = "1.8.1" + url = "https://avro.apache.org/docs/1.8.1/" + +[[params.versions]] + version = "1.8.0" + url = "https://avro.apache.org/docs/1.8.0/" + +[[params.versions]] + version = "1.7.7" + url = "https://avro.apache.org/docs/1.7.7/" + +[[params.versions]] + version = "1.7.6" + url = "https://avro.apache.org/docs/1.7.6/" + +[[params.versions]] + version = "1.7.5" + url = "https://avro.apache.org/docs/1.7.5/" + +[[params.versions]] + version = "1.7.4" + url = "https://avro.apache.org/docs/1.7.4/" + +[[params.versions]] + version = "1.7.3" + url = "https://avro.apache.org/docs/1.7.3/" + +[[params.versions]] + version = "1.7.2" + url = "https://avro.apache.org/docs/1.7.2/" + +[[params.versions]] + version = "1.7.1" + url = "https://avro.apache.org/docs/1.7.1/" + +[[params.versions]] + version = "1.7.0" + url = "https://avro.apache.org/docs/1.7.0/" + +[[params.versions]] + version = "1.6.3" + url = "https://avro.apache.org/docs/1.6.3/" + +[[params.versions]] + version = "1.6.2" + url = "https://avro.apache.org/docs/1.6.2/" + +[[params.versions]] + version = "1.6.1" + url = "https://avro.apache.org/docs/1.6.1/" + +[[params.versions]] + version = "1.6.0" + url = "https://avro.apache.org/docs/1.6.0/" + +[[params.versions]] + version = "1.5.4" + url = "https://avro.apache.org/docs/1.5.4/" + +[[params.versions]] + version = "1.5.3" + url = "https://avro.apache.org/docs/1.5.3/" + +[[params.versions]] + version = "1.5.2" + url = "https://avro.apache.org/docs/1.5.2/" + +[[params.versions]] + version = "1.5.1" + url = "https://avro.apache.org/docs/1.5.1/" + +[[params.versions]] + version = "1.5.0" + url = "https://avro.apache.org/docs/1.5.0/" + +[[params.versions]] + version = "1.4.1" + url = "https://avro.apache.org/docs/1.4.1/" + +[[params.versions]] + version = "1.4.0" + url = "https://avro.apache.org/docs/1.4.0/" + +[[params.versions]] + version = "1.3.3" + url = "https://avro.apache.org/docs/1.3.3/" + +[[params.versions]] + version = "1.3.2" + url = "https://avro.apache.org/docs/1.3.2/" + +[[params.versions]] + version = "1.3.1" + url = "https://avro.apache.org/docs/1.3.1/" + +[[params.versions]] + version = "1.3.0" + url = "https://avro.apache.org/docs/1.3.0/" + +[[params.versions]] + version = "1.2.0" + url = "https://avro.apache.org/docs/1.2.0/" + +[[params.versions]] + version = "1.1.0" + url = "https://avro.apache.org/docs/1.1.0/" + +[[params.versions]] + version = "1.0.0" + url = "https://avro.apache.org/docs/1.0.0/" + +[module] + [module.hugoVersion] + extended = true + min = "0.110.0" + [[module.imports]] + path = "github.com/google/docsy" diff --git a/doc/content/en/_index.html b/doc/content/en/_index.html new file mode 100644 index 00000000000..57c806025ba --- /dev/null +++ b/doc/content/en/_index.html @@ -0,0 +1,72 @@ ++++ +title = "Apache Avro" +linkTitle = "Apache Avro" + ++++ + + + + +
+
+
+
+
+

Apache Avroâ„ĸ - a data serialization system

+ +
+
+
+
+
+ +{{% blocks/lead color="primary" %}} +Apache Avroâ„ĸ is the leading serialization format for record data, and first choice for streaming data pipelines. +It offers excellent schema evolution, and has implementations for the JVM (Java, Kotlin, Scala, …), Python, C/C++/C#, PHP, Ruby, +Rust, JavaScript, and even Perl. +{{% /blocks/lead %}} + +{{< blocks/section color="dark" type="features">}} + + +{{% blocks/feature icon="fab fa-java" title="Getting started with Java" url="/docs/++version++/getting-started-java" %}} +For Java / JVM users, find out everything you need to know about specifying a schema, (de)serializing Avro data and code generation. +{{% /blocks/feature %}} + +{{% blocks/feature icon="fab fa-python" title="Getting started with Python" url="/docs/++version++/getting-started-python" %}} +For Python users, find out everything you need to know about specifying a schema and (de)serializing Avro data. +{{% /blocks/feature %}} + +{{% blocks/feature icon="fad fa-comments" title="Join Our Community!" url="/community/" %}} +Learn from or connect with other users in our open and welcoming community. We'd love to hear from you! +{{% /blocks/feature %}} + +{{< /blocks/section >}} diff --git a/doc/content/en/_index.md b/doc/content/en/_index.md new file mode 100644 index 00000000000..ae6cc051fd9 --- /dev/null +++ b/doc/content/en/_index.md @@ -0,0 +1,40 @@ +--- +title: Apache Avro +--- + +{{< blocks/cover title="Apache Avroâ„ĸ " image_anchor="top" >}} + +Learn More + + +Download + +

a data serialization system

+{{< blocks/link-down color="info" >}} +{{< /blocks/cover >}} + + +{{% blocks/lead color="primary" %}} + +Apache Avroâ„ĸ is the leading serialization format for record data, and first choice for streaming data pipelines. It offers excellent schema evolution, and has implementations for the JVM (Java, Kotlin, Scala, â€Ļ), Python, C/C++/C#, PHP, Ruby, Rust, JavaScript, and even Perl. + +{{% /blocks/lead %}} + + +{{< blocks/section color="dark" type="row">}} + + +{{% blocks/feature icon="fab fa-java" title="Getting started with Java" url="/docs/++version++/getting-started-java" %}} +For Java / JVM users, find out everything you need to know about specifying a schema, (de)serializing Avro data and code generation. +{{% /blocks/feature %}} + +{{% blocks/feature icon="fab fa-python" title="Getting started with Python" url="/docs/++version++/getting-started-python" %}} +For Python users, find out everything you need to know about specifying a schema and (de)serializing Avro data. +{{% /blocks/feature %}} + +{{% blocks/feature icon="fad fa-comments" title="Join Our Community!" url="/community/" %}} +Learn from or connect with other users in our open and welcoming community. We'd love to hear from you! +{{% /blocks/feature %}} + +{{< /blocks/section >}} + diff --git a/doc/content/en/avro.rdf b/doc/content/en/avro.rdf new file mode 100644 index 00000000000..ba44ba09860 --- /dev/null +++ b/doc/content/en/avro.rdf @@ -0,0 +1,67 @@ + + + + + + 2011-01-11 + + Apache Avro + + + Apache Avro is a data serialization system. + Apache Avro is a data serialization system. + + + + C + C# + C++ + Java + JavaScript + Perl + PHP + Python + Ruby + Rust + + + + + + + + Avro 1.12.0 + 2024-08-05 + 1.12.0 + + + + + + + + + + diff --git a/doc/content/en/blog/_index.md b/doc/content/en/blog/_index.md new file mode 100644 index 00000000000..85f97bd211d --- /dev/null +++ b/doc/content/en/blog/_index.md @@ -0,0 +1,33 @@ +--- +title: "Blog" +linkTitle: "Blog" +menu: + main: + weight: 30 +--- + + + +This is the **blog** section. It has two categories: News and Releases. + +Files in these directories will be listed in reverse chronological order. + diff --git a/doc/content/en/blog/news/_index.md b/doc/content/en/blog/news/_index.md new file mode 100644 index 00000000000..243dcf5f4a2 --- /dev/null +++ b/doc/content/en/blog/news/_index.md @@ -0,0 +1,26 @@ +--- +title: "News About Apache Avro" +linkTitle: "News" +weight: 20 +--- + + diff --git a/doc/content/en/blog/news/avro-joins-apache.md b/doc/content/en/blog/news/avro-joins-apache.md new file mode 100755 index 00000000000..dbc1872644d --- /dev/null +++ b/doc/content/en/blog/news/avro-joins-apache.md @@ -0,0 +1,28 @@ +--- +title: "Avro joins Apache" +linkTitle: "Avro joins Apache" +date: 2009-04-10 +--- + + + +Avro has joined the Apache Software Foundation as a Hadoop subproject. diff --git a/doc/content/en/blog/news/new-committer-christophe-le-saec.md b/doc/content/en/blog/news/new-committer-christophe-le-saec.md new file mode 100755 index 00000000000..1522c1722b9 --- /dev/null +++ b/doc/content/en/blog/news/new-committer-christophe-le-saec.md @@ -0,0 +1,41 @@ +--- +title: "New committer: Christophe Le Saec" +linkTitle: "New committer: Christophe Le Saec" +date: 2023-08-09 +--- + + + +The Project Management Committee (PMC) for Apache Avro has invited Christophe +Le Saec to become a committer and we are pleased to announce that +he has accepted. + +Christophe definitely puts in the work and, has an impressive breadth of +knowledge about the languages of the Avro SDK! + +As an ASF project, we tend to be very conservative about making changes, and +Christophe brings in fresh ideas and very quickly proposes concrete +implementations to prove them. He has a good understanding of Avro, the +motivation to move things forward, and the expertise to make changes! At the +same time, he's easy to talk to and flexible in coming to a consensus. + +Thanks for all your hard work! diff --git a/doc/content/en/blog/news/new-committer-david-mollitor.md b/doc/content/en/blog/news/new-committer-david-mollitor.md new file mode 100755 index 00000000000..eb793009466 --- /dev/null +++ b/doc/content/en/blog/news/new-committer-david-mollitor.md @@ -0,0 +1,41 @@ +--- +title: "New committer: David Mollitor" +linkTitle: "New committer: David Mollitor" +date: 2021-10-05 +--- + + + +The Project Management Committee (PMC) for Apache Avro +has invited David Mollitor to become a committer and we are pleased +to announce that he has accepted. + +Since 2017, David has raised and fixed many issues in the +Java SDK. Recently he's been finding and providing fixes for subtle +performance issues. His work is always high-quality and he is +reactive and pleasant to talk with on code reviews and JIRA. + +Being a committer enables easier contribution to the +project since there is no need to go via the patch +submission process. This should enable better productivity. + +It's great to have you as part of the team, David! diff --git a/doc/content/en/blog/news/new-committer-martin-grigorov.md b/doc/content/en/blog/news/new-committer-martin-grigorov.md new file mode 100755 index 00000000000..78cc3b61648 --- /dev/null +++ b/doc/content/en/blog/news/new-committer-martin-grigorov.md @@ -0,0 +1,41 @@ +--- +title: "New committer: Martin Grigorov" +linkTitle: "New committer: Martin Grigorov" +date: 2022-01-04 +--- + + + +The Project Management Committee (PMC) for Apache Avro +has invited Martin Grigorov to become a committer and we are pleased +to announce that he has accepted. + +Over the last few months, he has been active, reliable and easy to +work with on PRs and on the mailing list. His work is of high +quality, and he has a breadth of experience in many of the SDK languages. +I'm especially keen to point out the work he's been doing on the website! + +Being a committer enables easier contribution to the +project since there is no need to go via the patch +submission process. This should enable better productivity. + +It's great to have you as part of the team, Martin! diff --git a/doc/content/en/blog/news/new-committer-oscar-westra-van-holthe-kind.md b/doc/content/en/blog/news/new-committer-oscar-westra-van-holthe-kind.md new file mode 100755 index 00000000000..535a2d88185 --- /dev/null +++ b/doc/content/en/blog/news/new-committer-oscar-westra-van-holthe-kind.md @@ -0,0 +1,41 @@ +--- +title: "New committer: Oscar Westra van Holthe - Kind" +linkTitle: "New committer: Oscar Westra van Holthe - Kind" +date: 2023-08-09 +--- + + + +The Project Management Committee (PMC) for Apache Avro has invited Oscar +Westra van Holthe - Kind to become a committer and we are pleased to announce that +he has accepted. + +Oscar has done some really solid work on the IDL and JavaCC parts of the Java +SDK. We trust his work and think it's exceptionally high quality. From the +start, he has already doing much of the work of a committer, demonstrated by +his continuous presence in commenting JIRA, reviewing PRs as well as +encouraging and insightful words on the mailing list. + +As a bonus, in his spare time, Oscar also maintains the IntelliJ plugin for +[IDL support](https://plugins.jetbrains.com/plugin/15728-apache-avro-idl-schema-support)! + +Thanks for all your hard work, and welcome! diff --git a/doc/content/en/blog/news/new-committer-zoltan-csizmadia.md b/doc/content/en/blog/news/new-committer-zoltan-csizmadia.md new file mode 100755 index 00000000000..42834c551c5 --- /dev/null +++ b/doc/content/en/blog/news/new-committer-zoltan-csizmadia.md @@ -0,0 +1,47 @@ +--- +title: "New committer: Zoltan Csizmadia" +linkTitle: "New committer: Zoltan Csizmadia" +date: 2022-03-29 +--- + + + +The Project Management Committee (PMC) for Apache Avro has invited +Zoltan Csizmadia to become a committer and we are pleased to announce +that he has accepted. + +Zoltan has been present in the C# SDK for over two years and has +really increased his activity in maintaining this language in the last +few months. He knows the technology, but more importantly, he is +patient and works well with those of us who rely on the expertise of +others. Recently, he has been engaging with other contributors to +increase the maintainability and quality of the dotnet code, and we +have confidence in his decisions to balance stability of the +established code and the expectations of modern C# developers. + +Being a committer enables easier contribution to the project since +there is no need to go via the patch submission process. This should +enable better productivity. + +Please join me in congratulating Zoltan on his recognition of great +work thus far in our community. + diff --git a/doc/content/en/blog/news/new-pmc-martin-grigorov.md b/doc/content/en/blog/news/new-pmc-martin-grigorov.md new file mode 100755 index 00000000000..659bdb30d42 --- /dev/null +++ b/doc/content/en/blog/news/new-pmc-martin-grigorov.md @@ -0,0 +1,30 @@ +--- +title: "New PMC member: Martin Grigorov" +linkTitle: "New PMC member: Martin Grigorov" +date: 2022-09-13 +--- + + + +The Project Management Committee (PMC) for Apache Avro is pleased to announce that Martin Grigorov has accepted our invitation to become a PMC member. He has has been active, reliable and responsive to the community and a solid contributor to various SDKs, bringing well-thought out reviews and comments to both old and new PRs and JIRA. He definitely stepped up for the website refactoring and preparing for the 1.11.1 release! + +Please join me in welcoming Martin to the Avro PMC! diff --git a/doc/content/en/blog/news/new-pmc-michael-a-smith.md b/doc/content/en/blog/news/new-pmc-michael-a-smith.md new file mode 100755 index 00000000000..2d203128eca --- /dev/null +++ b/doc/content/en/blog/news/new-pmc-michael-a-smith.md @@ -0,0 +1,34 @@ +--- +title: "New PMC member: Michael A. Smith" +linkTitle: "New PMC member: Michael A. Smith" +date: 2023-08-09 +--- + + + +The Project Management Committee (PMC) for Apache Avro has invited Michael A. +Smith to the PMC and we are pleased to announce that he has accepted. + +Notably, Michael has taken a leadership role in ensuring the quality of the +Python SDK, lending his expertise to ensure that Avro has a place in the +python community, while keeping our implementation up-to-date with standards +and modern versions. It's not an easy task, and we appreciate all he does! diff --git a/doc/content/en/blog/news/new-project-logo.md b/doc/content/en/blog/news/new-project-logo.md new file mode 100644 index 00000000000..24f886912da --- /dev/null +++ b/doc/content/en/blog/news/new-project-logo.md @@ -0,0 +1,50 @@ +--- +title: "New Project Logo" +linkTitle: "New Project Logo" +date: 2023-11-21 +--- + + + +The Apache Avro project has a new project logo! + +The old logo was derived from the logo of a (now defunct) aircraft manufacturer +in Great Britain. This posed a risk, as the Apache foundation would not contest +legal action (even if extremely unlikely). + +But thanks to Emma Kellam, we now have a new logo! She has made several logo +designs, and after some debate and several votes (it was a close call!), we can +announce the new logo: + +[//]: # (the logo scales to 100% high or all available width, so limit it) +

+{{< project_logo >}} +

+ +The new logo is an homage to the previous logo, which is also triangular and +uses blue colours. The paper airplane embodies keywords like 'fast', 'small' +and 'efficient'. The blobby tail left behind by the airplane makes the icon +unique and embodies 'flow' and 'transformation'. + +All in all a very nice logo for Apache Avro, which embodies the same keywords. + +Thanks for all your hard work Emma, and welcome! diff --git a/doc/content/en/blog/releases/_index.md b/doc/content/en/blog/releases/_index.md new file mode 100644 index 00000000000..55875726bd8 --- /dev/null +++ b/doc/content/en/blog/releases/_index.md @@ -0,0 +1,28 @@ +--- +title: "Releases" +linkTitle: "Releases" +weight: 20 +aliases: +- /releases.html +--- + + diff --git a/doc/content/en/blog/releases/avro-1.0.0-released.md b/doc/content/en/blog/releases/avro-1.0.0-released.md new file mode 100755 index 00000000000..1bf74fbbb0f --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.0.0-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.0.0" +linkTitle: "Avro 1.0.0" +date: 2010-07-15 +--- + + + +The first release of Avro is now available. To download, it, use the "Download" link below. diff --git a/doc/content/en/blog/releases/avro-1.1.0-released.md b/doc/content/en/blog/releases/avro-1.1.0-released.md new file mode 100755 index 00000000000..f8876219ced --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.1.0-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.1.0" +linkTitle: "Avro 1.1.0" +date: 2009-09-15 +--- + + + +Apache Avro 1.1.0 is now available! diff --git a/doc/content/en/blog/releases/avro-1.10.0-released.md b/doc/content/en/blog/releases/avro-1.10.0-released.md new file mode 100755 index 00000000000..d7a41f5e39d --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.10.0-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.10.0" +linkTitle: "Avro 1.10.0" +date: 2020-06-29 +--- + + + +Apache Avro 1.10.0 has been released! diff --git a/doc/content/en/blog/releases/avro-1.10.1-released.md b/doc/content/en/blog/releases/avro-1.10.1-released.md new file mode 100755 index 00000000000..b8e6b199bd2 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.10.1-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.10.1" +linkTitle: "Avro 1.10.1" +date: 2020-12-03 +--- + + + +Apache Avro 1.10.1 has been released! diff --git a/doc/content/en/blog/releases/avro-1.10.2-released.md b/doc/content/en/blog/releases/avro-1.10.2-released.md new file mode 100755 index 00000000000..ea1a74a7256 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.10.2-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.10.2" +linkTitle: "Avro 1.10.2" +date: 2021-03-15 +--- + + + +Apache Avro 1.10.2 has been released! diff --git a/doc/content/en/blog/releases/avro-1.11.0-released.md b/doc/content/en/blog/releases/avro-1.11.0-released.md new file mode 100755 index 00000000000..54fd8b7293c --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.11.0-released.md @@ -0,0 +1,70 @@ +--- +title: "Avro 1.11.0" +linkTitle: "Avro 1.11.0" +date: 2021-10-31 +--- + + + +The Apache Avro community is pleased to announce the release of Avro 1.11.0! + +All signed release artifacts, signatures and verification instructions can +be found }}">here + +This release includes 120 Jira issues, including some interesting features: + +* Specification: AVRO-3212 Support documentation tags for FIXED types +* C#: AVRO-2961 Support dotnet framework 5.0 +* C#: AVRO-3225 Prevent memory errors when deserializing untrusted data +* C++: AVRO-2923 Logical type corrections +* Java: AVRO-2863 Support Avro core on android +* Javascript: AVRO-3131 Drop support for node.js 10 +* Perl: AVRO-3190 Fix error when reading from EOF +* Python: AVRO-2906 Improved performance validating deep record data +* Python: AVRO-2914 Drop Python 2 support +* Python: AVRO-3004 Drop Python 3.5 support +* Ruby: AVRO-3108 Drop Ruby 2.5 support + +For the first time, the 1.11.0 release includes experimental support for +**Rust**. Work is continuing on this donated SDK, but we have not versioned and +published official artifacts for this release. + +**Python**: The avro package fully supports Python 3. We will no longer publish a +separate avro-python3 package + +And of course upgraded dependencies to latest versions, CVE fixes and more: +https://issues.apache.org/jira/issues/?jql=project%3DAVRO%20AND%20fixVersion%3D1.11.0 + +The link to all fixed JIRA issues and a brief summary can be found at: +https://github.com/apache/avro/releases/tag/release-1.11.0 + +In addition, language-specific release artifacts are available: + +* C#: https://www.nuget.org/packages/Apache.Avro/1.11.0 +* Java: from Maven Central, +* Javascript: https://www.npmjs.com/package/avro-js/v/1.11.0 +* Perl: https://metacpan.org/release/Avro +* Python 3: https://pypi.org/project/avro/1.11.0 +* Ruby: https://rubygems.org/gems/avro/versions/1.11.0 + +Thanks to everyone for contributing! + diff --git a/doc/content/en/blog/releases/avro-1.11.1-released.md b/doc/content/en/blog/releases/avro-1.11.1-released.md new file mode 100755 index 00000000000..d78d172930d --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.11.1-released.md @@ -0,0 +1,130 @@ +--- +title: "Avro 1.11.1" +linkTitle: "Avro 1.11.1" +date: 2022-07-31 +--- + + + +The Apache Avro community is pleased to announce the release of Avro 1.11.1! + +All signed release artifacts, signatures and verification instructions can +be found }}">here + +## Most interesting + +This release includes 256 Jira issues, including some interesting features: + +Avro specification +- [AVRO-3436](https://issues.apache.org/jira/browse/AVRO-3436) Clarify which names are allowed to be qualified with namespaces +- [AVRO-3370](https://issues.apache.org/jira/browse/AVRO-3370) Inconsistent behaviour on types as invalid names +- [AVRO-3275](https://issues.apache.org/jira/browse/AVRO-3275) Clarify how fullnames are created, with example +- [AVRO-3257](https://issues.apache.org/jira/browse/AVRO-3257) IDL: add syntax to create optional fields +- [AVRO-2019](https://issues.apache.org/jira/browse/AVRO-2019) Improve docs for logical type annotation + +C++ +- [AVRO-2722](https://issues.apache.org/jira/browse/AVRO-2722) Use of boost::mt19937 is not thread safe + +C# +- [AVRO-3383](https://issues.apache.org/jira/browse/AVRO-3383) Many completed subtasks for modernizing C# coding style +- [AVRO-3481](https://issues.apache.org/jira/browse/AVRO-3481) Input and output variable type mismatch +- [AVRO-3475](https://issues.apache.org/jira/browse/AVRO-3475) Enforce time-millis and time-micros specification +- [AVRO-3469](https://issues.apache.org/jira/browse/AVRO-3469) Build and test using .NET SDK 7.0 +- [AVRO-3468](https://issues.apache.org/jira/browse/AVRO-3468) Default values for logical types not supported +- [AVRO-3467](https://issues.apache.org/jira/browse/AVRO-3467) Use oracle-actions to test with Early Access JDKs +- [AVRO-3453](https://issues.apache.org/jira/browse/AVRO-3453) Avrogen Add Generated Code Attribute +- [AVRO-3432](https://issues.apache.org/jira/browse/AVRO-3432) Add command line option to skip creation of directories +- [AVRO-3411](https://issues.apache.org/jira/browse/AVRO-3411) Add Visual Studio Code Devcontainer support +- [AVRO-3388](https://issues.apache.org/jira/browse/AVRO-3388) Implement extra codecs for C# as seperate nuget packages +- [AVRO-3265](https://issues.apache.org/jira/browse/AVRO-3265) avrogen generates uncompilable code when namespace ends +with ".Avro" +- [AVRO-3219](https://issues.apache.org/jira/browse/AVRO-3219) Support nullable enum type fields + +Java +- [AVRO-3531](https://issues.apache.org/jira/browse/AVRO-3531) GenericDatumReader in multithread lead to infinite loop +- [AVRO-3482](https://issues.apache.org/jira/browse/AVRO-3482) Reuse MAGIC in DataFileReader +- [AVRO-3586](https://issues.apache.org/jira/browse/AVRO-3586) Make Avro Build Reproducible +- [AVRO-3441](https://issues.apache.org/jira/browse/AVRO-3441) Automatically register LogicalTypeFactory classes +- [AVRO-3375](https://issues.apache.org/jira/browse/AVRO-3375) Add union branch, array index and map key "path" +information to serialization errors +- [AVRO-3374](https://issues.apache.org/jira/browse/AVRO-3374) Fully qualified type reference "ns.int" loses namespace +- [AVRO-3294](https://issues.apache.org/jira/browse/AVRO-3294) IDL parsing allows doc comments in strange places +- [AVRO-3273](https://issues.apache.org/jira/browse/AVRO-3273) avro-maven-plugin breaks on old versions of Maven +- [AVRO-3266](https://issues.apache.org/jira/browse/AVRO-3266) Output stream incompatible with MagicS3GuardCommitter +- [AVRO-3243](https://issues.apache.org/jira/browse/AVRO-3243) Lock conflicts when using computeIfAbsent +- [AVRO-3120](https://issues.apache.org/jira/browse/AVRO-3120) Support Next Java LTS (Java 17) +- [AVRO-2498](https://issues.apache.org/jira/browse/AVRO-2498) UUID generation is not working + +Javascript +- [AVRO-3489](https://issues.apache.org/jira/browse/AVRO-3489) Replace istanbul with nyc for code coverage +- [AVRO-3322](https://issues.apache.org/jira/browse/AVRO-3322) Buffer is not defined in browser environment +- [AVRO-3084](https://issues.apache.org/jira/browse/AVRO-3084) Fix JavaScript interop test to work with other languages on CI + +Perl +- [AVRO-3263](https://issues.apache.org/jira/browse/AVRO-3263) Schema validation warning on invalid schema with a long field + +Python +- [AVRO-3542](https://issues.apache.org/jira/browse/AVRO-3542) Scale assignment optimization +- [AVRO-3521](https://issues.apache.org/jira/browse/AVRO-3521) "Scale" property from decimal object +- [AVRO-3380](https://issues.apache.org/jira/browse/AVRO-3380) Byte reading in avro.io does not assert bytes read +- [AVRO-3229](https://issues.apache.org/jira/browse/AVRO-3229) validate the default value of an enum field +- [AVRO-3218](https://issues.apache.org/jira/browse/AVRO-3218) Pass LogicalType to BytesDecimalSchema + +Ruby +- [AVRO-3277](https://issues.apache.org/jira/browse/AVRO-3277) Test against Ruby 3.1 + +Rust +- [AVRO-3558](https://issues.apache.org/jira/browse/AVRO-3558) Add a demo crate that shows usage as WebAssembly +- [AVRO-3526](https://issues.apache.org/jira/browse/AVRO-3526) Improve resolving Bytes and Fixed from string +- [AVRO-3506](https://issues.apache.org/jira/browse/AVRO-3506) Implement Single Object Writer +- [AVRO-3507](https://issues.apache.org/jira/browse/AVRO-3507) Implement Single Object Reader +- [AVRO-3405](https://issues.apache.org/jira/browse/AVRO-3405) Add API for user-provided metadata to file +- [AVRO-3339](https://issues.apache.org/jira/browse/AVRO-3339) Rename crate from avro-rs to apache-avro +- [AVRO-3479](https://issues.apache.org/jira/browse/AVRO-3479) Derive Avro Schema macro + +Website +- [AVRO-2175](https://issues.apache.org/jira/browse/AVRO-2175) Website refactor +- [AVRO-3450](https://issues.apache.org/jira/browse/AVRO-3450) Document IDL support in IDEs + + +## Rust + +This is the first release that provides the `apache-avro` crate at [crates.io](https://crates.io/crates/apache-avro)! + +## JIRA + +A list of all JIRA tickets fixed in 1.11.1 could be found [here](https://issues.apache.org/jira/issues/?jql=project%3DAVRO%20AND%20fixVersion%3D1.11.1) + +## Language repositories + +In addition, language-specific release artifacts are available: + +* C#: https://www.nuget.org/packages/Apache.Avro/1.11.1 +* Java: https://repo1.maven.org/maven2/org/apache/avro/avro/1.11.1/ +* Javascript: https://www.npmjs.com/package/avro-js/v/1.11.1 +* Perl: https://metacpan.org/release/Avro +* Python 3: https://pypi.org/project/avro/1.11.1 +* Ruby: https://rubygems.org/gems/avro/versions/1.11.1 +* Rust: https://crates.io/crates/apache-avro/0.14.0 + +Thanks to everyone for contributing! + diff --git a/doc/content/en/blog/releases/avro-1.11.2-released.md b/doc/content/en/blog/releases/avro-1.11.2-released.md new file mode 100755 index 00000000000..3949d5f52ed --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.11.2-released.md @@ -0,0 +1,98 @@ +--- +title: "Avro 1.11.2" +linkTitle: "Avro 1.11.2" +date: 2023-07-03 +--- + + + +The Apache Avro community is pleased to announce the release of Avro 1.11.2! + +All signed release artifacts, signatures and verification instructions can +be found }}">here + +This release addresses 89 [Avro JIRA](https://issues.apache.org/jira/issues/?jql=project%3DAVRO%20AND%20fixVersion%3D1.11.2). + +## Highlights + +C# +- [AVRO-3434](https://issues.apache.org/jira/browse/AVRO-3434): Support logical schemas in reflect reader and writer +- [AVRO-3670](https://issues.apache.org/jira/browse/AVRO-3670): Add NET 7.0 support +- [AVRO-3724](https://issues.apache.org/jira/browse/AVRO-3724): Fix C# JsonEncoder for nested array of records +- [AVRO-3756](https://issues.apache.org/jira/browse/AVRO-3756): Add a method to return types instead of writing them to disk + +C++ +- [AVRO-3601](https://issues.apache.org/jira/browse/AVRO-3601): C++ API header contains breaking include +- [AVRO-3705](https://issues.apache.org/jira/browse/AVRO-3705): C++17 support + +Java +- [AVRO-2943](https://issues.apache.org/jira/browse/AVRO-2943): Add new GenericData String/Utf8 ARRAY comparison test +- [AVRO-2943](https://issues.apache.org/jira/browse/AVRO-2943): improve GenericRecord MAP type comparison +- [AVRO-3473](https://issues.apache.org/jira/browse/AVRO-3473): Use ServiceLoader to discover Conversion +- [AVRO-3536](https://issues.apache.org/jira/browse/AVRO-3536): Inherit conversions for Union type +- [AVRO-3597](https://issues.apache.org/jira/browse/AVRO-3597): Allow custom readers to override string creation +- [AVRO-3560](https://issues.apache.org/jira/browse/AVRO-3560): Throw SchemaParseException on dangling content beyond end of schema +- [AVRO-3602](https://issues.apache.org/jira/browse/AVRO-3602): Support Map(with non-String keys) and Set in ReflectDatumReader +- [AVRO-3676](https://issues.apache.org/jira/browse/AVRO-3676): Produce valid toString() for UUID JSON +- [AVRO-3698](https://issues.apache.org/jira/browse/AVRO-3698): SpecificData.getClassName must replace reserved words +- [AVRO-3700](https://issues.apache.org/jira/browse/AVRO-3700): Publish Java SBOM artifacts with CycloneDX +- [AVRO-3783](https://issues.apache.org/jira/browse/AVRO-3783): Read LONG length for bytes, only allow INT sizes +- [AVRO-3706](https://issues.apache.org/jira/browse/AVRO-3706): accept space in folder name + +Python +- [AVRO-3761](https://issues.apache.org/jira/browse/AVRO-3761): Fix broken validation of nullable UUID field +- [AVRO-3229](https://issues.apache.org/jira/browse/AVRO-3229): Raise on invalid enum default only if validation enabled +- [AVRO-3622](https://issues.apache.org/jira/browse/AVRO-3622): Fix compatibility check for schemas having or missing namespace +- [AVRO-3669](https://issues.apache.org/jira/browse/AVRO-3669): Add py.typed marker file (PEP561 compliance) +- [AVRO-3672](https://issues.apache.org/jira/browse/AVRO-3672): Add CI testing for Python 3.11 +- [AVRO-3680](https://issues.apache.org/jira/browse/AVRO-3680): allow to disable name validation + +Ruby +- [AVRO-3775](https://issues.apache.org/jira/browse/AVRO-3775): Fix decoded default value of logical type +- [AVRO-3697](https://issues.apache.org/jira/browse/AVRO-3697): Test against Ruby 3.2 +- [AVRO-3722](https://issues.apache.org/jira/browse/AVRO-3722): Eagerly initialize instance variables for better inline cache hits + +Rust +- Many, many bug fixes and implementation progress in this experimental SDK. +- Rust CI builds and lints are passing, and has been released to crates.io as version 0.15.0 + +In addition: +- Upgrade dependencies to latest versions, including CVE fixes. +- Testing and build improvements. +- Performance fixes, other bug fixes, better documentation and more... + + +Known issues +- [AVRO-3789](https://issues.apache.org/jira/browse/AVRO-3789) Java: Problem when comparing empty MAP types. + +## Language SDK / Convenience artifacts + +* C#: https://www.nuget.org/packages/Apache.Avro/1.11.2 +* Java: https://repo1.maven.org/maven2/org/apache/avro/avro/1.11.2/ +* Javascript: https://www.npmjs.com/package/avro-js/v/1.11.2 +* Perl: https://metacpan.org/release/Avro +* Python 3: https://pypi.org/project/avro/1.11.2 +* Ruby: https://rubygems.org/gems/avro/versions/1.11.2 +* Rust: https://crates.io/crates/apache-avro/0.15.0 + +Thanks to everyone for contributing! + diff --git a/doc/content/en/blog/releases/avro-1.11.3-released.md b/doc/content/en/blog/releases/avro-1.11.3-released.md new file mode 100755 index 00000000000..50a0eef3fcf --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.11.3-released.md @@ -0,0 +1,79 @@ +--- +title: "Avro 1.11.3" +linkTitle: "Avro 1.11.3" +date: 2023-09-22 +--- + + + +The Apache Avro community is pleased to announce the release of Avro 1.11.3! + +All signed release artifacts, signatures and verification instructions can +be found }}">here + +This release [addresses 39 Jira issues](https://issues.apache.org/jira/issues/?jql=project%3DAVRO%20AND%20fixVersion%3D1.11.3). + +## Highlights + +Java +- [AVRO-3789](https://issues.apache.org/jira/browse/AVRO-3789): Comparing maps in GenericData is wrong for certain combinations and fails for empty maps +- [AVRO-3713](https://issues.apache.org/jira/browse/AVRO-3713): Thread scalability problem with the use of SynchronizedMap +- [AVRO-3486](https://issues.apache.org/jira/browse/AVRO-3486): Protocol namespace not parsed correctly if protocol is defined by full name +- [AVRO-2771](https://issues.apache.org/jira/browse/AVRO-2771): Allow having Error in a Record +- [AVRO-3819](https://issues.apache.org/jira/browse/AVRO-3819): Rationalize the system properties that limit allocation + +Python +- [AVRO-3819](https://issues.apache.org/jira/browse/AVRO-3819): Rationalize the system properties that limit allocation +- [AVRO-312](https://issues.apache.org/jira/browse/AVRO-312): Generate documentation for Python with Sphinx + +Rust +- [AVRO-3853](https://issues.apache.org/jira/browse/AVRO-3853): Support local-timestamp logical types for the Rust SDK +- [AVRO-3851](https://issues.apache.org/jira/browse/AVRO-3851): Validate default value for record fields and enums on parsing +- [AVRO-3847](https://issues.apache.org/jira/browse/AVRO-3847): Record field doesn't accept default value if field type is union and the type of default value is pre-defined name +- [AVRO-3846](https://issues.apache.org/jira/browse/AVRO-3846): Race condition can happen among serde tests +- [AVRO-3838](https://issues.apache.org/jira/browse/AVRO-3838): Replace regex crate with regex-lite +- [AVRO-3837](https://issues.apache.org/jira/browse/AVRO-3837): Disallow invalid namespaces for the Rust binding +- [AVRO-3835](https://issues.apache.org/jira/browse/AVRO-3835): Get rid of byteorder and zerocopy dependencies +- [AVRO-3830](https://issues.apache.org/jira/browse/AVRO-3830): Handle namespace properly if a name starts with dot +- [AVRO-3827](https://issues.apache.org/jira/browse/AVRO-3827): Disallow duplicate field names +- [AVRO-3787](https://issues.apache.org/jira/browse/AVRO-3787): Deserialization fails to use default if an enum in a record in a union is given an unknown symbol +- [AVRO-3786](https://issues.apache.org/jira/browse/AVRO-3786): Deserialization results in FindUnionVariant error if the writer and reader have the same symbol but at different positions +- + +In addition: +- Upgrade dependencies to latest versions, including CVE fixes. +- Testing and build improvements. +- Performance fixes, other bug fixes, better documentation and more. + +Known issues: ∅ + +## Language SDK / Convenience artifacts + +* C#: https://www.nuget.org/packages/Apache.Avro/1.11.3 +* Java: https://repo1.maven.org/maven2/org/apache/avro/avro/1.11.3/ +* Javascript: https://www.npmjs.com/package/avro-js/v/1.11.3 +* Perl: https://metacpan.org/release/Avro +* Python 3: https://pypi.org/project/avro/1.11.3 +* Ruby: https://rubygems.org/gems/avro/versions/1.11.3 +* Rust: https://crates.io/crates/apache-avro/0.16.0 + +Thanks to everyone for contributing! diff --git a/doc/content/en/blog/releases/avro-1.11.4-released.md b/doc/content/en/blog/releases/avro-1.11.4-released.md new file mode 100755 index 00000000000..c5f36146dbd --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.11.4-released.md @@ -0,0 +1,49 @@ +--- +title: "Avro 1.11.4" +linkTitle: "Avro 1.11.4" +date: 2024-09-22 +--- + + + +The Apache Avro community is pleased to announce the release of Avro 1.11.4! + +All signed release artifacts, signatures and verification instructions can +be found }}">here + +This release [addresses 4 Jira issues](https://issues.apache.org/jira/issues/?jql=project%3DAVRO%20AND%20fixVersion%3D1.11.4) +only in the Java SDK. All other SDKs have no difference to their 1.12.0 release, so please use 1.12.0 for them! + +## Highlights + +Java +- [AVRO-3985](https://issues.apache.org/jira/browse/AVRO-3985): Restrict trusted packages in ReflectData and SpecificData +- [AVRO-3989](https://issues.apache.org/jira/browse/AVRO-3989): Maven Plugin Always Recompiles IDL Files +- [AVRO-3880](https://issues.apache.org/jira/browse/AVRO-3880): Upgrade maven-antrun-plugin to 3.1.0 +- [AVRO-3748](https://issues.apache.org/jira/browse/AVRO-3748): issue with DataFileSeekableInput.SeekableInputStream.skip + + +## Language SDK / Convenience artifacts + +* Java: https://repo1.maven.org/maven2/org/apache/avro/avro/1.11.4/ + +Thanks to everyone for contributing! diff --git a/doc/content/en/blog/releases/avro-1.11.5-released.md b/doc/content/en/blog/releases/avro-1.11.5-released.md new file mode 100755 index 00000000000..0fef4dd238d --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.11.5-released.md @@ -0,0 +1,48 @@ +--- +title: "Avro 1.11.5" +linkTitle: "Avro 1.11.5" +date: 2025-10-16 +--- + + + +The Apache Avro community is pleased to announce the release of Avro 1.11.5! + +All signed release artifacts, signatures and verification instructions can +be found }}">here + +## Security Fixes + +This release addresses 4 security fixes: +* Prevent class with empty Java package being trusted by SpecificDatumReader ([#3311](https://github.com/apache/avro/pull/3311)) +* Remove the default serializable packages and deprecated the property to introduce org.apache.avro.SERIALIZABLE_CLASSES instead ([#3376](https://github.com/apache/avro/pull/3376)) +* java-[key-]class allowed packages must be packages ([#3453](https://github.com/apache/avro/pull/3453)) +* [AVRO-4053](https://issues.apache.org/jira/browse/AVRO-4053): doc consistency in velocity templates ([#3150](https://github.com/apache/avro/pull/3150)) + +These fixes apply only to the Java SDK. All other SDKs have no difference with their 1.11.4 release. + + +## Language SDK / Convenience artifacts + +* Java: https://repo1.maven.org/maven2/org/apache/avro/avro/1.11.5/ + +Thanks to everyone for contributing! diff --git a/doc/content/en/blog/releases/avro-1.12.0-released.md b/doc/content/en/blog/releases/avro-1.12.0-released.md new file mode 100755 index 00000000000..d4703858ebd --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.12.0-released.md @@ -0,0 +1,538 @@ +--- +title: "Avro 1.12.0" +linkTitle: "Avro 1.12.0" +date: 2024-08-05 +--- + + + +The Apache Avro community is pleased to announce the release of Avro 1.12.0! + +All signed release artifacts, signatures and verification instructions can be found }}">here + +## Changes + +### Sub-task + +- [AVRO-3122]: TestAvroKeyOutputFormat and other avro-mapred tests fail with Java 17 +- [AVRO-3308]: Include a curated list of resources +- [AVRO-3384]: Define C# Coding Style Guidelines +- [AVRO-3449]: Add an onboarding guide for contributors, committers and PMC +- [AVRO-3458]: Add test coverage for GenericRecord +- [AVRO-3488]: Fix Spelling Mistakes +- [AVRO-3490]: Fix IDE0016 Use throw expression +- [AVRO-3491]: Fix IDE0020 Use pattern matching to avoid 'is' check followed by a cast +- [AVRO-3497]: Fix IDE0075 Simplify conditional expression +- [AVRO-3499]: Fix IDE0079 Remove unnecessary suppression +- [AVRO-3538]: Improve the contributions page +- [AVRO-3700]: Publish Java SBOM artifacts with CycloneDX +- [AVRO-3813]: Use list of primitiv +- [AVRO-3826]: Commons test for C++ module +- [AVRO-3916]: Add nanos support for the Rust SDK +- [AVRO-3926]: [Rust] Allow UUID to serialize to Fixed[16] + +### Bug fixes + +- [AVRO-265]: Protocol namespace always written out in toJson +- [AVRO-1318]: Python schema should store fingerprints +- [AVRO-1463]: Undefined values cause warnings when unions with null serialized +- [AVRO-1517]: Unicode strings are accepted as bytes and fixed type by perl API +- [AVRO-1521]: Inconsistent behavior of Perl API with 'boolean' type +- [AVRO-1523]: Perl API: int/long type minimum value checks are off by one +- [AVRO-1737]: Unhashable type: 'RecordSchema' +- [AVRO-1830]: Avro-Perl DataFileReader chokes when avro.codec is absent +- [AVRO-2254]: Unions with 2 records declared downward fail +- [AVRO-2284]: Incorrect EnumSymbol initialization in TestReadingWritingDataInEvolvedSchemas.java +- [AVRO-2498]: UUID generation is not working avro 1.9 version +- [AVRO-2598]: C++ standard of library implies C++ standard of projects using Avro +- [AVRO-2722]: impl/DataFile.cc use of boost::mt19937 for DataFileWriteBase::makeSync is not thread safe +- [AVRO-2771]: Java 1.9.X doesn't allow having Error in a Record +- [AVRO-2862]: C# Primitive Schema losing metadata +- [AVRO-2883]: Avrogen (csharp) namespace mapping missing for references +- [AVRO-2885]: Providing a decimal number in an int field doesn't return an error +- [AVRO-2943]: Map comparison between Utf8 and String keys fails +- [AVRO-2987]: pkg-config has a broken `Requires:` section +- [AVRO-3003]: c# apache avro codegen - default value for enum types are not setting up properly +- [AVRO-3133]: EnumAdjust.resolve should compare unqualified name rather than full name +- [AVRO-3216]: Rust: failure reading multiple use of named schemas in file +- [AVRO-3232]: Rust deserializer: add missing matches to deserialize_any union and string/map +- [AVRO-3234]: Rust: Add new codec: zstandard +- [AVRO-3240]: Schema deserialization is not backwards compatible +- [AVRO-3259]: When opening an avro file which is encoded with anything besides none and deflate, it defaults to none and then returns garbage. +- [AVRO-3273]: [Java] avro-maven-plugin breaks on old versions of Maven +- [AVRO-3316]: [Rust] build breaks in docker build +- [AVRO-3322]: JavaScript: Buffer is not defined in browser environment +- [AVRO-3331]: Rust: Cannot extract Decimal value +- [AVRO-3350]: Validate that Default value is found in Enum +- [AVRO-3386]: [PHP] Build failing on github and travis +- [AVRO-3410]: [Rust] lint failure +- [AVRO-3433]: Rust: The canonical form should preserve schema references +- [AVRO-3448]: Rust: Encoding Panic with valid schema and input +- [AVRO-3452]: [rust] Derive Deserialize produces invalid Name struct +- [AVRO-3460]: [rust] Value::validate does not validate against Schema Refs +- [AVRO-3461]: [rust] Resolution Flow does not handle schema Refs +- [AVRO-3466]: Rust: serialize Schema to JSON loses inner namespace names +- [AVRO-3468]: Default values for logical types not supported +- [AVRO-3471]: Microseconds logical types are rounded to milliseconds +- [AVRO-3481]: Input and output variable type mismatch +- [AVRO-3482]: DataFileReader should reuse MAGIC data read from inputstream +- [AVRO-3486]: Protocol namespace not parsed correctly if protocol is defined by full name +- [AVRO-3495]: Rust: Record serialization is sensitive to order of fields in struct +- [AVRO-3511]: Rust: Fix the parsing of decimal logical type +- [AVRO-3516]: [rust] Avro Derive not working outside of repo context +- [AVRO-3529]: [Rust][branch-1.11] Cargo.toml is a virtual manifest, requires actual package +- [AVRO-3534]: Rust: Use dependency-review-action only for pull_request events +- [AVRO-3536]: Union type not inheriting type conversions +- [AVRO-3549]: [rust] Avro reader fails if it tries to read data compressed with codec that is not enabled in features +- [AVRO-3560]: avro ignores input after end of avsc json +- [AVRO-3568]: C# ToParsingForm normalizes logical type to "logical" rather than base type +- [AVRO-3581]: Usage of deprecated configuration properties in Velocity +- [AVRO-3585]: Unable to encode Value::String as Schema::UUID +- [AVRO-3587]: C: Fix possible heap-buffer-overflow in avro::DataFileReaderBase::readDataBlock() +- [AVRO-3595]: Release Notes missing for 1.11.1 +- [AVRO-3597]: Recent changes in GenericDatumReader.java break compatibility +- [AVRO-3601]: C++ API header contains breaking include +- [AVRO-3612]: Report specific location of incompatibility in record schema +- [AVRO-3613]: Unions cannot have more than one logical type in C# +- [AVRO-3617]: [C++] Integer overflow risks with Validator::count_ and Validator::counters_ +- [AVRO-3618]: [Java] TestBinaryDecoder should check consistency with directBinaryDecoder +- [AVRO-3619]: [Java] TestBinaryDecoder should check consistency with directBinaryDecoder +- [AVRO-3622]: Python compatibility check fails if record with and without namespace are compared +- [AVRO-3625]: [Rust] UnionSchema.is_nullable() should return true if any of the variants is Schema::Null +- [AVRO-3631]: Fix serialization of structs containing Fixed fields +- [AVRO-3632]: Union defaults are not handled as per the specification +- [AVRO-3642]: GenericSingleObjectReader::read_value fails on non-exhaustive read +- [AVRO-3645]: Fix deserialization of enum with unit () type +- [AVRO-3650]: Fix C++ Build on Manjaro +- [AVRO-3656]: Vulnerabilities from dependencies - jackson-databind & commons-text +- [AVRO-3657]: Computation of initial buffer size in OutputBuffer makes no sense +- [AVRO-3659]: Typo in python example +- [AVRO-3662]: [Ruby] Ruby 2.6 CI workflow fails since a while +- [AVRO-3663]: rust crate apache_avro_derive creates invalid schemas for raw identifiers +- [AVRO-3667]: [Python] Python 3.10 CI test fails since a while +- [AVRO-3669]: Missing py.typed file +- [AVRO-3674]: Value::Record containing enums fail to validate when using namespaces in Schema +- [AVRO-3683]: Rust Writer, Reader can't use Schemas with dependencies in other Schemas. i.e. The output of Schema::parse_list +- [AVRO-3687]: Rust enum missing default +- [AVRO-3688]: Schema resolution panics when a custom record field is included multiple times +- [AVRO-3698]: [Java] SpecificData.getClassName must replace reserved words +- [AVRO-3706]: AVDL nested imports cannot be resolved if path contains spaces +- [AVRO-3712]: C++ Build Failure on Manjaro +- [AVRO-3724]: C# JsonEncoder can't handle nested array of records +- [AVRO-3737]: [C] memcheck_test_avro_commons_schema is failing +- [AVRO-3738]: [Build][C#] The release build fails with .NET 7.0 target +- [AVRO-3747]: Make serde `is_human_readable` configurable +- [AVRO-3748]: issue with DataFileSeekableInput.SeekableInputStream.skip +- [AVRO-3749]: incorrect conflicting field when field name starts with symbols +- [AVRO-3751]: FastReaderBuilder in multithread lead to infinite loop also blocking other threads +- [AVRO-3755]: [Rust] Deserialization fails for reader schema with namespace +- [AVRO-3756]: Support writing types back to the user in memory without writing files to disk +- [AVRO-3767]: [Rust] Fix ref resolving in Union +- [AVRO-3772]: [Rust] Deserialize Errors for an Unknown Enum Symbol instead of Returning Default +- [AVRO-3773]: [Ruby] Decimal logical type fail to validate default +- [AVRO-3775]: [Ruby] decimal default is not converted to BigDecimal +- [AVRO-3780]: [Rust] Bug: decimal logical type usage through Fixed schema +- [AVRO-3782]: [Rust] Incorrect decimal resolving +- [AVRO-3785]: [Rust] Deserialization if reader schema has a namespace and a union with null and a record containing a reference type +- [AVRO-3786]: [Rust] Deserialization results in FindUnionVariant error if the writer and reader have the same symbol but at different positions +- [AVRO-3787]: [Rust] Deserialization fails to use default if an enum in a record in a union is given an unknown symbol +- [AVRO-3800]: profile section should be declared in the root package. +- [AVRO-3809]: Faulty validation of a type reference with implicit nested namespace +- [AVRO-3814]: [Rust] Schema resolution fails when extending a nested record with a union type +- [AVRO-3818]: Enclosing namespace should be inherited to the inner named types if they have no their own namespaces +- [AVRO-3820]: Don't allow invalid field names +- [AVRO-3821]: Rust: Record (de?)serialization is sensitive to order of fields in struct +- [AVRO-3823]: Show helpful error messages +- [AVRO-3824]: The instruction for building the website should be more precise +- [AVRO-3827]: Disallow duplicate field names +- [AVRO-3830]: Handle namespace properly if a name starts with dot +- [AVRO-3837]: Disallow invalid namespaces for the Rust binding +- [AVRO-3846]: Race condition can happen among serde tests +- [AVRO-3847]: Record field doesn't accept default value if field type is union and the type of default value is pre-defined name +- [AVRO-3849]: [Rust] "make readme" doesn't work +- [AVRO-3855]: [rust] lint/clippy fails in ubertool +- [AVRO-3858]: [Build] Add some config to ./build.sh sign +- [AVRO-3859]: [Build][C#] build.sh clean fails to remove some C# files +- [AVRO-3861]: [Build] Add RAT exclusions for python docs +- [AVRO-3865]: [Build][perl] Files are leftover after a build +- [AVRO-3866]: [Build][Python] Files are leftover after a build +- [AVRO-3876]: JacksonUtils is not symmetric +- [AVRO-3881]: Writer ignores user metadata when the body is empty +- [AVRO-3888]: CVE with common compress +- [AVRO-3889]: Maven Plugin Always Recompiles IDL Files +- [AVRO-3894]: [Rust] Record field aliases are not taken into account when serializing +- [AVRO-3897]: Disallow invalid namespace in fully qualified name for Rust SDK +- [AVRO-3898]: [rust] compatibility fails with different namespaces +- [AVRO-3899]: [Rust] Invalid logical types should be ignored and treated as the underlying type +- [AVRO-3912]: Issue with deserialization for BigDecimal in rust +- [AVRO-3925]: [Rust]Decimal type serialization/deserialization is incorrect. +- [AVRO-3928]: Avro Rust cannot parse default int logical-type date in a valid schema +- [AVRO-3932]: [C]: fix variable reference in CMakeLists.txt +- [AVRO-3940]: Failed to generate Java classes from multiple .avsc files containing same type +- [AVRO-3953]: C# CodeGen.cs:503 incorrectly throws for "reserved keywords" +- [AVRO-3955]: [Rust] unable to decode string enum from avro encoded data +- [AVRO-3956]: NPE when calling Protocol#equals or hashCode +- [AVRO-3957]: Fix typos in docs and examples +- [AVRO-3964]: [Rust] Out-of-bounds panic +- [AVRO-3970]: [Rust] incorrect compatibility checks with logicalType uuid +- [AVRO-3974]: [Rust] incorrect compatibility checks with ref fields +- [AVRO-3990]: [C++] avrogencpp generates invalid code for union with a reserved word +- [AVRO-4004]: [Rust] Canonical form transformation does not strip the logicalType +- [AVRO-4006]: [Java] DataFileReader does not correctly identify last sync marker when reading/skipping blocks +- [AVRO-4011]: Schema generated via AvroSchema is not compatible with itself +- [AVRO-4014]: [Rust] Sporadic value-schema mismatch with fixed struct + +### New Features + +- [AVRO-3223]: Support optional codecs in C# library +- [AVRO-3358]: Update documentation in DataFileReader +- [AVRO-3388]: Implement extra codecs for C# as seperate nuget packages +- [AVRO-3506]: [rust] Implement Single Object Writer +- [AVRO-3507]: [rust] Implement Single Object Reader +- [AVRO-3591]: Improve interoperability tests with a common test suite +- [AVRO-3592]: [C#] New packages are not included in the build distribution +- [AVRO-3666]: New schema parser for all supported schema formats +- [AVRO-3677]: Introduce Named Schema Formatters +- [AVRO-3678]: [Rust] Support write float value to field defined as double +- [AVRO-3725]: fix documentation of functions and variables +- [AVRO-3764]: [Rust] Add schemata-based resolve method +- [AVRO-3872]: [Build][C#] Warning on nuget upload about README +- [AVRO-3922]: Add timestamp-nanos support to Ruby + +### Improvements + +- [AVRO-312]: Generate documentation for Python with Sphinx +- [AVRO-530]: allow for mutual recursion in type definitions +- [AVRO-1496]: Avro aliases support for C++ +- [AVRO-1514]: Clean up perl API dependencies +- [AVRO-1938]: Python support for generating canonical forms of schema +- [AVRO-2307]: Opt-in setting to improve GC behavior during deserialization? +- [AVRO-2397]: Implement Alias Support for C++ +- [AVRO-2717]: Fix undefined behaviour in ZigZag encoding if Avro was compiled with the C++ standard less than C++20. +- [AVRO-3001]: JsonEncode Decode support for C# +- [AVRO-3043]: Remove redundant generic casts +- [AVRO-3078]: C#: Logical type 'local-timestamp-millis' +- [AVRO-3084]: Fix JavaScript interop test to read files generated by other languages on CI +- [AVRO-3120]: Support Next Java LTS (Java 17) +- [AVRO-3214]: Rust: Support "doc" for FixedSchema +- [AVRO-3245]: Rust: Replace crc crate with crc32fast +- [AVRO-3246]: Rust: Add new codec: bzip2 +- [AVRO-3248]: Rust: Support named types in UnionSchema +- [AVRO-3255]: [Ruby] specify rubygems_mfa_required in gemspec metadata +- [AVRO-3264]: Improve the Avro landing page +- [AVRO-3274]: Request for C# API to implement a JSON Encoder +- [AVRO-3284]: Rust: Upgrade to digest 0.10 +- [AVRO-3285]: Upgrade JavaCC and plugin +- [AVRO-3292]: Bump Microsoft.NET.Test.Sdk from 16.11.0 to 17.0.0 in /lang/csharp +- [AVRO-3302]: Rust: Implement interop tests for the Rust module +- [AVRO-3303]: Rust: Add support for Xz codec +- [AVRO-3306]: Java: Build failure with JDK 18+ +- [AVRO-3312]: Rust: Use u32 instead of i32 for the Enum/Union's index field +- [AVRO-3314]: ArgumentOutOfRangeException thrown in AvroDecimal IConvertable.ToType +- [AVRO-3315]: Rust: Add support to back/cycle reference an alias +- [AVRO-3317]: JavaScript: Update dependencies +- [AVRO-3318]: Java: Bump slf4j.version from 1.7.32 to 1.7.33 in /lang/java +- [AVRO-3319]: Rust: Update zstd requirement from 0.9.0+zstd.1.5.0 to 0.10.0+zstd.1.5.0 in /lang/rust +- [AVRO-3320]: C#: Bump NUnit3TestAdapter from 4.2.0 to 4.2.1 in /lang/csharp +- [AVRO-3321]: Java: Bump commons-cli from 1.4 to 1.5.0 in /lang/java +- [AVRO-3323]: Remove suppression of CS1591 from AvroDecimal +- [AVRO-3324]: Add omitted braces in AvroDecimal +- [AVRO-3325]: Remove suppression of CA2225 in AvroDecimal +- [AVRO-3326]: Styling - Elements should not be on a single line in AvroDecimal +- [AVRO-3327]: Use Pattern Matching to avoid is check followed by cast +- [AVRO-3328]: Documentation update for CodeGen class +- [AVRO-3329]: Add omitted braces in CodeGen class +- [AVRO-3330]: Avrogen avsc compiler should return 0 exit code if help requested +- [AVRO-3333]: Spacing styling issues in CodeGen class +- [AVRO-3334]: Simplify getNullableType in CodeGen +- [AVRO-3335]: Throw exception for null parameter in GenerateNames +- [AVRO-3336]: Deprecate obsolete namespace lookup in CodeGen +- [AVRO-3337]: C#: Bump Log4net to a newer version +- [AVRO-3340]: Enable standard code analysis and Intellisense +- [AVRO-3341]: Update documentation of CodeGenException +- [AVRO-3342]: Update documentation in CodeGenUtil +- [AVRO-3343]: Update codec to styling standards +- [AVRO-3344]: C#: Remove DataBlock class +- [AVRO-3345]: Resolve unnecessary suppression of CA1052 in DataFileConstants +- [AVRO-3346]: Update documentation to meet standards in DataFileReader +- [AVRO-3347]: Update AddNamespace in CodeGen to meet styling guidelines +- [AVRO-3348]: Update ProcessSchemas to meet styling guidelines +- [AVRO-3349]: Update ProcessProtocols to meet styling guidelines +- [AVRO-3352]: Use required minimum package version fo Newtonsoft only +- [AVRO-3353]: Simplify naming in CodeGen +- [AVRO-3354]: Simplify If statements in CodeGen +- [AVRO-3355]: Fix order of Access Modifier in Codec +- [AVRO-3356]: Simplify naming in DataFileReader +- [AVRO-3357]: Properties only assigned in constructors should be marked readonly +- [AVRO-3359]: Updated formatting in DeflateCodec +- [AVRO-3360]: Update Header XML Documentation +- [AVRO-3361]: Simplify if statement in NullCodec +- [AVRO-3366]: Fix naming in GenericEnum +- [AVRO-3367]: Remove unnecessary suppression of CA1307 from GenericEnum +- [AVRO-3377]: Deserialization of record of mangled Java class throws ClassCastException +- [AVRO-3404]: Extend the IDL syntax to serve as a .avsc equivalent as well +- [AVRO-3405]: add API for user-provided metadata when writing to Object Container File +- [AVRO-3407]: Test for user metadata in the interop tests +- [AVRO-3415]: Add C# code coverage support +- [AVRO-3416]: Benchmarking project for C# +- [AVRO-3418]: [Rust] Fix clippy errors for Rust 1.59.0 +- [AVRO-3421]: Add tests for ArraySchema +- [AVRO-3424]: C# Add support to parse string into Schema.Type +- [AVRO-3427]: Add command line option to skip creation of directories based on namespace path +- [AVRO-3434]: .NET/#C: Support LogicalSchema for ReflectReader/Writer +- [AVRO-3435]: Add --version to avrogen +- [AVRO-3450]: Document IDL support in IDEs +- [AVRO-3451]: fix poor Avro write performance +- [AVRO-3453]: C# Avrogen Add Generated Code Attribute +- [AVRO-3464]: Rust: Print user frientlier output for the 'benchmark' example +- [AVRO-3465]: Add avrogen protocol tests +- [AVRO-3467]: Use oracle-actions to test with Early Access JDKs +- [AVRO-3469]: Build and test using .NET SDK 7.0 in guthub action +- [AVRO-3474]: Increase read performance by moving CanRead to constructor +- [AVRO-3475]: Enforce time-millis and time-micros specification +- [AVRO-3477]: Add unit tests for logical types with fixed base type +- [AVRO-3479]: [rust] Derive Avro Schema macro +- [AVRO-3483]: [Rust] Log error messages with a reason when the validation fails +- [AVRO-3484]: Rust: Implement derive default via annotation +- [AVRO-3485]: Rust: Implement derive doc via annotation +- [AVRO-3487]: Java: Bump Jackson to 2.12.6.1 +- [AVRO-3489]: JavaScript: Replace istanbul with nyc for code coverage +- [AVRO-3492]: Rust: Implement derive aliases via annotation +- [AVRO-3496]: Rust: Use visitor.visit_borrowed_str() when possible +- [AVRO-3498]: Deprecate NameCtorKey +- [AVRO-3500]: Rust: Use property based testing for avro_derive IT tests +- [AVRO-3501]: Rust: Enable Github Actions caching for the Rust CI +- [AVRO-3502]: Rust: Wrong [ORDER] for Parsing Canonical Form +- [AVRO-3510]: PHP build fails on Travis +- [AVRO-3517]: Rust: Optimize crates' size by disabling default features of the dependencies +- [AVRO-3518]: Rust: Represent aliases as Name instead of String +- [AVRO-3522]: Rust: Setup better logging and colored stacktraces for the tests +- [AVRO-3526]: Rust: Improve resolving Bytes and Fixed from string +- [AVRO-3527]: Generated equals() and hashCode() for SpecificRecords +- [AVRO-3530]: Rust: Use dependency-review-action for Rust +- [AVRO-3533]: Rust: Update dependencies +- [AVRO-3542]: Scale assignment optimization +- [AVRO-3543]: Support wasm32 compilation target for Rust library +- [AVRO-3547]: support custom attribute at field level +- [AVRO-3554]: Create original art for the Avro logo +- [AVRO-3579]: Java Test : From Junit4 to JUnit5 +- [AVRO-3586]: Make Avro Build Reproducible +- [AVRO-3599]: Rust: Make apache-avro-test-helper releasable +- [AVRO-3600]: [Rust] UnionSchema::new method should be public +- [AVRO-3602]: Support Map(with non-String keys) and Set in ReflectDatumReader +- [AVRO-3608]: Rust: Fix clippy errors in Rust 1.63.0 +- [AVRO-3609]: support custom attributes +- [AVRO-3610]: [C++] Upgrade from C++ 11 to C++ 17 +- [AVRO-3611]: org.apache.avro.util.RandomData generates invalid test data +- [AVRO-3616]: [C++]: Fix compilation warnings +- [AVRO-3621]: [Rust] Improved resolution of nullable record fields +- [AVRO-3623]: Improve the PULL_REQUEST_TEMPLATE +- [AVRO-3624]: Fix Avro website checks on whimsy +- [AVRO-3630]: [Rust] Make it possible to extend pre-existing Avro bytes +- [AVRO-3633]: Additional attributes for 'avro_derive' crate +- [AVRO-3634]: Implement AvroSchemaComponent for bool +- [AVRO-3639]: [Rust] Derive implementation for Eq where possible +- [AVRO-3644]: [JAVA] Support java.util.Optional in reflect package +- [AVRO-3649]: [JAVA] reorder union types to match default value +- [AVRO-3658]: Bump jackson to address CVE-2020-36518 +- [AVRO-3660]: SpecificRecord java data generator helper method - should I contribute? +- [AVRO-3679]: [Rust] Enable 'perf' feature of regex dependency +- [AVRO-3692]: Serde flatten is not supported when deserializing +- [AVRO-3693]: avrogencpp Invalid type for union exception does not identify which union +- [AVRO-3704]: Naming rules : multiple choice +- [AVRO-3705]: avrogencpp needs an option to generate code using std instead of boost +- [AVRO-3708]: [Rust] Fix clippy warnings introduced with Rust 1.67.0 +- [AVRO-3709]: [Rust] Add aliases to RecordField +- [AVRO-3711]: Add documentation about uuid in IDL +- [AVRO-3721]: [Java] Add cache to org.apache.avro.JsonProperties.getObjectProps +- [AVRO-3722]: Eagerly Initialize Instance Variables in Ruby Implementation +- [AVRO-3723]: [Rust] Make schema::ResolvedSchema and schema::Names public +- [AVRO-3727]: Add RollForward to C# avrogen tool +- [AVRO-3741]: Note about the version requirement of Rust in BUILD.md +- [AVRO-3742]: Bump maven-plugin-plugin from 3.8.1 to 3.8.2 +- [AVRO-3743]: Bump cyclonedx-maven-plugin from 2.7.6 to 2.7.7 +- [AVRO-3744]: Bump maven-checkstyle-plugin from 3.2.1 to 3.2.2 +- [AVRO-3745]: Bump zstd-jni from 1.5.4-2 to 1.5.5-2 +- [AVRO-3746]: Bump grpc.version from 1.54.0 to 1.54.1 +- [AVRO-3757]: [rust] Update syn to 2.x +- [AVRO-3758]: [Rust] Use AtomicXyz types instead of static mutable ones +- [AVRO-3759]: [Rust] Schema types inconsistency +- [AVRO-3766]: [Rust] Print friendlier errors when test cases fail +- [AVRO-3771]: [Rust] Logging flood during validate method +- [AVRO-3779]: Any big decimal conversion +- [AVRO-3784]: [Rust] Make Decimal more usable until its rewritten +- [AVRO-3790]: [RUBY] Missing default namespace information in SchemaParseError +- [AVRO-3794]: [Rust] Do not fail the shared tests when the shared folder is not available +- [AVRO-3799]: Enable the schema parser to read and parse from input streams for Rust binding +- [AVRO-3812]: Handle null namespace properly for canonicalized schema representation +- [AVRO-3815]: Broken indentation in the specification doc +- [AVRO-3828]: [Rust] Use newer Github actions for setting up Rust +- [AVRO-3829]: JUnit4 to JUnit5 : continue +- [AVRO-3833]: Spec: clarify usage names and aliases +- [AVRO-3835]: [Rust] Get rid of byteorder and zerocopy dependencies +- [AVRO-3836]: [Rust] Fix the build with Rust 1.65.0 +- [AVRO-3838]: [Rust] Replace regex crate with regex-lite +- [AVRO-3839]: [Rust] Replace lazy_static crate with std::sync::OnceLock +- [AVRO-3844]: [Rust] Fix clippy errors with Rust 1.72.0 +- [AVRO-3851]: Validate default value for record fields and enums on parsing +- [AVRO-3852]: Support Java 21 +- [AVRO-3853]: Support local-timestamp logical types for the Rust SDK +- [AVRO-3862]: Add aliases and doc methods to Schema in Rust SDK +- [AVRO-3863]: Delete temporary test data after tests finish +- [AVRO-3868]: Check consistency between the doc comment in lib.rs and README.md +- [AVRO-3870]: Speed up CI for Rust +- [AVRO-3871]: Add BlockingDirectBinaryEncoder +- [AVRO-3877]: [doc] fix wrong configuration for avro-maven-plugin in java example +- [AVRO-3878]: Rename default git branch to be 'main' +- [AVRO-3879]: [Build][Python] Fix `./build.sh clean` to remove the generated Python documents +- [AVRO-3880]: Upgrade maven-antrun-plugin to 3.1.0 +- [AVRO-3884]: Add local-timestamp-nanos and timestamp-nanos +- [AVRO-3885]: Update the maillist link +- [AVRO-3886]: [Rust] Serialize attribute in schema to support custom logical type +- [AVRO-3887]: Remove redundant casts +- [AVRO-3891]: Remove redundant cast from DirectBinaryDecoder +- [AVRO-3892]: [Rust] support to convert bytes to fixed in resolve_fixed +- [AVRO-3896]: [Rust] support read schema with custom logical type +- [AVRO-3900]: Permissiveness in schema namespaces for rust SDK? +- [AVRO-3901]: [Rust] Better serde union support +- [AVRO-3904]: [rust] Sometimes when calculating schema compatibility the code panics but maybe it should not +- [AVRO-3905]: [Rust] Fix clippy error with Rust 1.74.0 +- [AVRO-3910]: [Rust] Replace `color-backtrace` with `better-panic` for the tests +- [AVRO-3914]: Add nanos support for the Java SDK +- [AVRO-3917]: [Rust] Field aliases are not taken into account when calculating schema compatibility +- [AVRO-3918]: Allow UUID to serialize to Fixed[16] +- [AVRO-3919]: Add UUID type example +- [AVRO-3920]: [Rust] Serialize custom attribute in RecordField +- [AVRO-3923]: Add Avro 1.11.3 release blog +- [AVRO-3927]: [Rust] support custom attributes in list and map +- [AVRO-3935]: Support logical types in Rust Schema Compatibility checks +- [AVRO-3936]: Clean up NOTICE file +- [AVRO-3938]: Schema.Parser.validate should not be null +- [AVRO-3939]: [Rust] Make it possible to use custom schema comparators +- [AVRO-3942]: MemoryOutputStream yields a compiler warning +- [AVRO-3943]: Unused folders +- [AVRO-3948]: [Rust] Re-export bigdecimal::BigDecimal as apache_avro::BigDecimal +- [AVRO-3949]: [Rust]: Add support for serde to apache_avro::Decimal +- [AVRO-3950]: [rust] Some code when checking schema compatibility is never reached +- [AVRO-3958]: Update min CMake version to 3.5 +- [AVRO-3959]: Avoid deprecated OSX atomic ops +- [AVRO-3960]: Fix st ANYARGS warnings +- [AVRO-3961]: Add AVRO_INVALID to avro_type_t +- [AVRO-3962]: [Rust] avro-derive supports extract docs from field comments +- [AVRO-3977]: Fix failing typecheck in Python 3.12 +- [AVRO-3981]: Close SyncableFileOutputStream +- [AVRO-3982]: Use String.isEmpty() instead +- [AVRO-3983]: Allow setting a custom encoder in DataFileWriter +- [AVRO-3985]: Restrict trusted packages in ReflectData and SpecificData +- [AVRO-3992]: [C++] Encoding a record with 0 fields in a vector throws +- [AVRO-3994]: [C++] Solidus (/) should not be escaped in JSON output +- [AVRO-3995]: [C++] Update build system to disallow compiling with unsupported language versions +- [AVRO-3998]: Switch Perl library from JSON::XS to JSON::MaybeXS +- [AVRO-3999]: Avoid warnings in Perl test suite +- [AVRO-4007]: [Rust] Faster is_nullable for UnionSchema +- [AVRO-4010]: Avoid resolving schema on every call to read() +- [AVRO-4013]: PHP 8 Deprecations +- [AVRO-4015]: avro-cpp does not work with CMake's FetchContent +- [AVRO-4016]: Remove the use of MD5 in org.apache.avro.file.DataFileWriter#generateSync +- [AVRO-4019]: [C++] Correct signedness of validator methods +- [AVRO-4022]: Revive docker image + +### Testing + +- [AVRO-3277]: Test against Ruby 3.1 +- [AVRO-3278]: Drop support for Ruby 2.6 +- [AVRO-3558]: Rust: Add a demo crate that shows usage as WebAssembly +- [AVRO-3696]: [Python] Replace tox-wheel with upstream tox 4 +- [AVRO-3697]: Test against Ruby 3.2 +- [AVRO-3701]: Add github action to validate maven 4 build compatibility +- [AVRO-3921]: Test against Ruby 3.3 + +### Wishes + +- [AVRO-1757]: Serialize Avro schema objects to avdl file (IDL format) +- [AVRO-2211]: SchemaBuilder equivalent or other means of schema creation +- [AVRO-3197]: Rust: Disable logical type on failure + +### Tasks + +- [AVRO-3205]: Rust: Update Cargo.toml [package] information +- [AVRO-3241]: [Java] Publish SNAPSHOT artifacts +- [AVRO-3242]: Use TravisCI for testing Apache Avro on Linux ARM64 +- [AVRO-3247]: Rust: Run MIRI checks +- [AVRO-3281]: Bump zstd-jni from 1.5.0-4 to 1.5.1-1 in /lang/java +- [AVRO-3282]: Bump grpc.version from 1.42.1 to 1.43.1 in /lang/java +- [AVRO-3283]: Update zerocopy requirement from 0.3.0 to 0.6.1 in /lang/rust +- [AVRO-3304]: avro-tools Update log4j dependency for critical vulnerability +- [AVRO-3309]: Bump NUnit.ConsoleRunner from 3.13.2 to 3.14.0 in /lang/csharp +- [AVRO-3310]: Bump build-helper-maven-plugin from 3.2.0 to 3.3.0 in /lang/java +- [AVRO-3311]: Bump grpc.version from 1.43.1 to 1.43.2 in /lang/java +- [AVRO-3332]: Java: Bump grpc.version from 1.43.2 to 1.44.0 in /lang/java +- [AVRO-3339]: Rust: Rename crate from avro-rs to apache-avro +- [AVRO-3351]: C#: Bump System.Reflection.Emit.Lightweight from 4.3.0 to 4.7.0 in /lang/csharp +- [AVRO-3372]: Java: Bump archetype-plugin.version from 3.2.0 to 3.2.1 in /lang/java +- [AVRO-3373]: Java: Bump protobuf-java from 3.19.1 to 3.19.4 in /lang/java +- [AVRO-3391]: Update typed-builder requirement from 0.9.1 to 0.10.0 in /lang/rust +- [AVRO-3409]: [Java] Bump Reload4j to 1.2.19 +- [AVRO-3419]: [Rust] Update strum 0.23.1 and strum_macros to 0.24.0 +- [AVRO-3422]: Bump jetty.version from 9.4.44.v20210927 to 9.4.45.v20220203 in /lang/java +- [AVRO-3428]: Rust: Restructure the RUST SDK to a Rust workspace +- [AVRO-3431]: CI: Cancel in-progress workflows if there are new commits in PR +- [AVRO-3432]: Java: Bump grpc.version from 1.44.0 to 1.44.1 in /lang/java +- [AVRO-3437]: Rust: Update dependencies +- [AVRO-3439]: Java: Bump netty-bom from 4.1.72.Final to 4.1.74.Final in /lang/java +- [AVRO-3455]: Java: Bump netty-bom from 4.1.74.Final to 4.1.75.Final +- [AVRO-3456]: Rust: Update zstd requirement from 0.10.0+zstd.1.5.2 to 0.11.0+zstd.1.5.2 +- [AVRO-3457]: JS: Bump mocha from 9.2.1 to 9.2.2 +- [AVRO-3462]: Java: Bump hadoop-client from 3.3.1 to 3.3.2 +- [AVRO-3463]: Java: Bump grpc.version from 1.44.1 to 1.45.0 +- [AVRO-3494]: Rust: uncomment some tests which pass +- [AVRO-3519]: Rust: Remove MIRI Github Actions check +- [AVRO-3552]: Rust: sort the contents in Cargo.toml files with cargo-tomlfmt +- [AVRO-3574]: Rust: Add Cargo.lock to Git +- [AVRO-3575]: Rust: Add a module for fuzzy testing +- [AVRO-3653]: [build] Move off Travis CI +- [AVRO-3661]: [Rust] Fix new clippy errors introduced with Rust 1.65 +- [AVRO-3672]: Add CI testing for Python 3.11 +- [AVRO-3681]: [Python] GitHub actions failing with python 3.6 +- [AVRO-3682]: [Build] Remove forrest from Avro build +- [AVRO-3754]: upgrade to jackson 2.15.0 +- [AVRO-3793]: [Rust] Bump minimum supported version of Rust to 1.65.0 +- [AVRO-3808]: Drop support for Python 3.6, add Pypy 3.8-3.10 +- [AVRO-3875]: [Rust]: Set "readme" metadata for each package separately +- [AVRO-3915]: [Rust] Extract dependencies used by more than one member crates into the workspace +- [AVRO-3937]: [Rust]: Use cargo-deny to check the dependencies' licenses +- [AVRO-3944]: Fix CMake warning +- [AVRO-3945]: Fix issues reported by cppcheck +- [AVRO-3967]: Replace boost::format with fmt +- [AVRO-3978]: Build with Java 11 minimum + +## Language SDK / Convenience artifacts + +* C#: https://www.nuget.org/packages/Apache.Avro/1.12.0 +* Java: https://repo1.maven.org/maven2/org/apache/avro/avro/1.12.0/ +* Javascript: https://www.npmjs.com/package/avro-js/v/1.12.0 +* Perl: https://metacpan.org/release/Avro +* Python 3: https://pypi.org/project/avro/1.12.0 +* Ruby: https://rubygems.org/gems/avro/versions/1.12.0 +* Rust: https://crates.io/crates/apache-avro/0.17.0 + +Thanks to everyone for contributing! diff --git a/doc/content/en/blog/releases/avro-1.12.1-released.md b/doc/content/en/blog/releases/avro-1.12.1-released.md new file mode 100755 index 00000000000..8d5c419535e --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.12.1-released.md @@ -0,0 +1,90 @@ +--- +title: "Avro 1.12.1" +linkTitle: "Avro 1.12.1" +date: 2025-10-16 +--- + + + +The Apache Avro community is pleased to announce the release of Avro 1.12.1! + +All signed release artifacts, signatures and verification instructions can be found }}">here + +## Security Fixes + +This release addresses 4 security fixes: +* Prevent class with empty Java package being trusted by SpecificDatumReader ([#3311](https://github.com/apache/avro/pull/3311)) +* Remove the default serializable packages and deprecated the property to introduce org.apache.avro.SERIALIZABLE_CLASSES instead ([#3376](https://github.com/apache/avro/pull/3376)) +* java-[key-]class allowed packages must be packages ([#3453](https://github.com/apache/avro/pull/3453)) +* [AVRO-4053](https://issues.apache.org/jira/browse/AVRO-4053): doc consistency in velocity templates ([#3150](https://github.com/apache/avro/pull/3150)) + +These fixes apply only to the Java SDK. + + +## Highlights + +### C++ +* [AVRO-4038](https://issues.apache.org/jira/browse/AVRO-4038): Add support local-timestamp-nanos and timestamp-nanos +* [AVRO-4081](https://issues.apache.org/jira/browse/AVRO-4081): Add big decimal support +* [AVRO-4058](https://issues.apache.org/jira/browse/AVRO-4058): Allow custom attributes in arrays +* [AVRO-4120](https://issues.apache.org/jira/browse/AVRO-4120): Allow custom attributes for MAP and FIXED types +* [AVRO-4140](https://issues.apache.org/jira/browse/AVRO-4140): Support uuid to annotate fixed +* [AVRO-3984](https://issues.apache.org/jira/browse/AVRO-3984): Improved code generation for unions + +### C# +* [AVRO-4075](https://issues.apache.org/jira/browse/AVRO-4075): Fix JsonDecoder string type failing to decode ISO string date +* [AVRO-2032](https://issues.apache.org/jira/browse/AVRO-2032): Add support for NaN, Infinity and -Infinity in JsonDecoder + +### Java +* [AVRO-4062](https://issues.apache.org/jira/browse/AVRO-4062): Allow leading underscores for names in idl +* [AVRO-4119](https://issues.apache.org/jira/browse/AVRO-4119): Make Nullable and NotNull annotations configurable +* [AVRO-4039](https://issues.apache.org/jira/browse/AVRO-4039): fix GenericData.newArray to only return an appropriate array implementation +* [AVRO-3940](https://issues.apache.org/jira/browse/AVRO-3940): Allow schema redefinition when equal +* [AVRO-3230](https://issues.apache.org/jira/browse/AVRO-3230): Enable fastread by default +* [AVRO-4133](https://issues.apache.org/jira/browse/AVRO-4133): Support default enum value in Protobuf to Avro +* [AVRO-4165](https://issues.apache.org/jira/browse/AVRO-4165): ability to specify AvroEncode on a class + +### PHP +* [AVRO-2843](https://issues.apache.org/jira/browse/AVRO-2843): PHP submit package on packagist.org +* [AVRO-4046](https://issues.apache.org/jira/browse/AVRO-4046): Handling of default values + + +## Other changes + +These SDKs have upgraded dependencies and minor bugfixes: +* C++ +* C# +* Javascript +* Java +* Python + + +## Language SDK / Convenience artifacts + +* C#: https://www.nuget.org/packages/Apache.Avro/1.12.1 +* Java: https://repo1.maven.org/maven2/org/apache/avro/avro/1.12.1/ +* Javascript: https://www.npmjs.com/package/avro-js/v/1.12.1 +* Perl: https://metacpan.org/release/Avro +* Python 3: https://pypi.org/project/avro/1.12.1 +* Ruby: https://rubygems.org/gems/avro/versions/1.12.1 + +Thanks to everyone for contributing! diff --git a/doc/content/en/blog/releases/avro-1.2.0-released.md b/doc/content/en/blog/releases/avro-1.2.0-released.md new file mode 100755 index 00000000000..24fc57ad92e --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.2.0-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.2.0" +linkTitle: "Avro 1.2.0" +date: 2009-10-15 +--- + + + +Apache Avro 1.2.0 is now available! diff --git a/doc/content/en/blog/releases/avro-1.3.0-released.md b/doc/content/en/blog/releases/avro-1.3.0-released.md new file mode 100755 index 00000000000..dc29d337dc5 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.3.0-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.3.0" +linkTitle: "Avro 1.3.0" +date: 2010-02-26 +--- + + + +Apache Avro 1.3.0 has been released! diff --git a/doc/content/en/blog/releases/avro-1.3.1-released.md b/doc/content/en/blog/releases/avro-1.3.1-released.md new file mode 100755 index 00000000000..f767d50dd49 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.3.1-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.3.1" +linkTitle: "Avro 1.3.1" +date: 2010-03-19 +--- + + + +Apache Avro 1.3.1 has been released! diff --git a/doc/content/en/blog/releases/avro-1.3.2-released.md b/doc/content/en/blog/releases/avro-1.3.2-released.md new file mode 100755 index 00000000000..3a0492df121 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.3.2-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.3.2" +linkTitle: "Avro 1.3.2" +date: 2010-03-31 +--- + + + +Apache Avro 1.3.2 has been released! diff --git a/doc/content/en/blog/releases/avro-1.3.3-released.md b/doc/content/en/blog/releases/avro-1.3.3-released.md new file mode 100755 index 00000000000..15eeabd4ecf --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.3.3-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.3.3" +linkTitle: "Avro 1.3.3" +date: 2010-07-07 +--- + + + +Apache Avro 1.3.3 has been released! diff --git a/doc/content/en/blog/releases/avro-1.4.0-released.md b/doc/content/en/blog/releases/avro-1.4.0-released.md new file mode 100755 index 00000000000..ca9df708ba5 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.4.0-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.4.0" +linkTitle: "Avro 1.4.0" +date: 2010-09-08 +--- + + + +Apache Avro 1.4.0 has been released! diff --git a/doc/content/en/blog/releases/avro-1.4.1-released.md b/doc/content/en/blog/releases/avro-1.4.1-released.md new file mode 100755 index 00000000000..b2ef0836f84 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.4.1-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.4.1" +linkTitle: "Avro 1.4.1" +date: 2010-10-13 +--- + + + +Apache Avro 1.4.1 has been released! diff --git a/doc/content/en/blog/releases/avro-1.5.0-released.md b/doc/content/en/blog/releases/avro-1.5.0-released.md new file mode 100755 index 00000000000..94daaccc724 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.5.0-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.5.0" +linkTitle: "Avro 1.5.0" +date: 2011-03-11 +--- + + + +Apache Avro 1.5.0 has been released! diff --git a/doc/content/en/blog/releases/avro-1.5.1-released.md b/doc/content/en/blog/releases/avro-1.5.1-released.md new file mode 100755 index 00000000000..24354286ad1 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.5.1-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.5.1" +linkTitle: "Avro 1.5.1" +date: 2011-05-06 +--- + + + +Apache Avro 1.5.1 has been released! diff --git a/doc/content/en/blog/releases/avro-1.5.2-released.md b/doc/content/en/blog/releases/avro-1.5.2-released.md new file mode 100755 index 00000000000..25fb5b7549c --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.5.2-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.5.2" +linkTitle: "Avro 1.5.2" +date: 2011-08-12 +--- + + + +Apache Avro 1.5.2 has been released! diff --git a/doc/content/en/blog/releases/avro-1.5.3-released.md b/doc/content/en/blog/releases/avro-1.5.3-released.md new file mode 100755 index 00000000000..5be5225a925 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.5.3-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.5.3" +linkTitle: "Avro 1.5.3" +date: 2011-08-29 +--- + + + +Apache Avro 1.5.3 has been released! diff --git a/doc/content/en/blog/releases/avro-1.5.4-released.md b/doc/content/en/blog/releases/avro-1.5.4-released.md new file mode 100755 index 00000000000..8eeab4f4b54 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.5.4-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.5.4" +linkTitle: "Avro 1.5.4" +date: 2011-09-12 +--- + + + +Apache Avro 1.5.4 has been released! diff --git a/doc/content/en/blog/releases/avro-1.6.0-released.md b/doc/content/en/blog/releases/avro-1.6.0-released.md new file mode 100755 index 00000000000..e131f4534f1 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.6.0-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.6.0" +linkTitle: "Avro 1.6.0" +date: 2011-11-02 +--- + + + +Apache Avro 1.6.0 has been released! diff --git a/doc/content/en/blog/releases/avro-1.6.1-released.md b/doc/content/en/blog/releases/avro-1.6.1-released.md new file mode 100755 index 00000000000..724b9b04003 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.6.1-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.6.1" +linkTitle: "Avro 1.6.1" +date: 2011-11-14 +--- + + + +Apache Avro 1.6.1 has been released! diff --git a/doc/content/en/blog/releases/avro-1.6.2-released.md b/doc/content/en/blog/releases/avro-1.6.2-released.md new file mode 100755 index 00000000000..15fae250b4e --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.6.2-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.6.2" +linkTitle: "Avro 1.6.2" +date: 2012-02-14 +--- + + + +Apache Avro 1.6.2 has been released! diff --git a/doc/content/en/blog/releases/avro-1.6.3-released.md b/doc/content/en/blog/releases/avro-1.6.3-released.md new file mode 100755 index 00000000000..3029f9e4739 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.6.3-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.6.3" +linkTitle: "Avro 1.6.3" +date: 2012-03-19 +--- + + + +Apache Avro 1.6.3 has been released! diff --git a/doc/content/en/blog/releases/avro-1.7.0-released.md b/doc/content/en/blog/releases/avro-1.7.0-released.md new file mode 100755 index 00000000000..02e5c15156a --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.7.0-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.7.0" +linkTitle: "Avro 1.7.0" +date: 2012-06-11 +--- + + + +Apache Avro 1.7.0 has been released! diff --git a/doc/content/en/blog/releases/avro-1.7.1-released.md b/doc/content/en/blog/releases/avro-1.7.1-released.md new file mode 100755 index 00000000000..6ef9278dae3 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.7.1-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.7.1" +linkTitle: "Avro 1.7.1" +date: 2012-07-18 +--- + + + +Apache Avro 1.7.1 has been released! diff --git a/doc/content/en/blog/releases/avro-1.7.2-released.md b/doc/content/en/blog/releases/avro-1.7.2-released.md new file mode 100755 index 00000000000..94e5719ed0e --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.7.2-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.7.2" +linkTitle: "Avro 1.7.2" +date: 2012-09-25 +--- + + + +Apache Avro 1.7.2 has been released! diff --git a/doc/content/en/blog/releases/avro-1.7.3-released.md b/doc/content/en/blog/releases/avro-1.7.3-released.md new file mode 100755 index 00000000000..63ff58392f8 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.7.3-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.7.3" +linkTitle: "Avro 1.7.3" +date: 2012-12-07 +--- + + + +Apache Avro 1.7.3 has been released! diff --git a/doc/content/en/blog/releases/avro-1.7.4-released.md b/doc/content/en/blog/releases/avro-1.7.4-released.md new file mode 100755 index 00000000000..a91a8ebc18b --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.7.4-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.7.4" +linkTitle: "Avro 1.7.4" +date: 2013-02-26 +--- + + + +Apache Avro 1.7.4 has been released! diff --git a/doc/content/en/blog/releases/avro-1.7.5-released.md b/doc/content/en/blog/releases/avro-1.7.5-released.md new file mode 100755 index 00000000000..44288ccf66e --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.7.5-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.7.5" +linkTitle: "Avro 1.7.5" +date: 2013-08-19 +--- + + + +Apache Avro 1.7.5 has been released! diff --git a/doc/content/en/blog/releases/avro-1.7.6-released.md b/doc/content/en/blog/releases/avro-1.7.6-released.md new file mode 100755 index 00000000000..fe93cd5c2ff --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.7.6-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.7.6" +linkTitle: "Avro 1.7.6" +date: 2014-01-22 +--- + + + +Apache Avro 1.7.6 has been released! diff --git a/doc/content/en/blog/releases/avro-1.7.7-released.md b/doc/content/en/blog/releases/avro-1.7.7-released.md new file mode 100755 index 00000000000..07a378ec000 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.7.7-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.7.7" +linkTitle: "Avro 1.7.7" +date: 2014-07-23 +--- + + + +Apache Avro 1.7.7 has been released! diff --git a/doc/content/en/blog/releases/avro-1.8.0-released.md b/doc/content/en/blog/releases/avro-1.8.0-released.md new file mode 100755 index 00000000000..9ca4a129d1f --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.8.0-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.8.0" +linkTitle: "Avro 1.8.0" +date: 2016-01-29 +--- + + + +Apache Avro 1.8.0 has been released! diff --git a/doc/content/en/blog/releases/avro-1.8.1-released.md b/doc/content/en/blog/releases/avro-1.8.1-released.md new file mode 100755 index 00000000000..ed20e60219d --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.8.1-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.8.1" +linkTitle: "Avro 1.8.1" +date: 2016-05-19 +--- + + + +Apache Avro 1.8.1 has been released! diff --git a/doc/content/en/blog/releases/avro-1.8.2-released.md b/doc/content/en/blog/releases/avro-1.8.2-released.md new file mode 100755 index 00000000000..07720e05053 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.8.2-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.8.2" +linkTitle: "Avro 1.8.2" +date: 2017-05-20 +--- + + + +Apache Avro 1.8.2 has been released! diff --git a/doc/content/en/blog/releases/avro-1.9.0-released.md b/doc/content/en/blog/releases/avro-1.9.0-released.md new file mode 100755 index 00000000000..0833216066a --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.9.0-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.9.0" +linkTitle: "Avro 1.9.0" +date: 2019-05-14 +--- + + + +Apache Avro 1.9.0 has been released! diff --git a/doc/content/en/blog/releases/avro-1.9.1-released.md b/doc/content/en/blog/releases/avro-1.9.1-released.md new file mode 100755 index 00000000000..6ae614e5b3b --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.9.1-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.9.1" +linkTitle: "Avro 1.9.1" +date: 2019-09-02 +--- + + + +Apache Avro 1.9.1 has been released! diff --git a/doc/content/en/blog/releases/avro-1.9.2-released.md b/doc/content/en/blog/releases/avro-1.9.2-released.md new file mode 100755 index 00000000000..e4580fa42bd --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.9.2-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.9.2" +linkTitle: "Avro 1.9.2" +date: 2020-02-19 +--- + + + +Apache Avro 1.9.2 has been released! diff --git a/doc/content/en/blog/releases/avro-joins-apache.md b/doc/content/en/blog/releases/avro-joins-apache.md new file mode 100755 index 00000000000..dbc1872644d --- /dev/null +++ b/doc/content/en/blog/releases/avro-joins-apache.md @@ -0,0 +1,28 @@ +--- +title: "Avro joins Apache" +linkTitle: "Avro joins Apache" +date: 2009-04-10 +--- + + + +Avro has joined the Apache Software Foundation as a Hadoop subproject. diff --git a/doc/content/en/community/_index.md b/doc/content/en/community/_index.md new file mode 100644 index 00000000000..643c532589a --- /dev/null +++ b/doc/content/en/community/_index.md @@ -0,0 +1,35 @@ +--- +title: Community +menu: + main: + weight: 40 +aliases: +- /irc.html +- /issue_tracking.html +- /mailing_lists.html +- /mail/ +- /version_control.html +--- + + + + \ No newline at end of file diff --git a/doc/content/en/docs/++version++/Editor Support/_index.md b/doc/content/en/docs/++version++/Editor Support/_index.md new file mode 100644 index 00000000000..2a8f5eb040d --- /dev/null +++ b/doc/content/en/docs/++version++/Editor Support/_index.md @@ -0,0 +1,175 @@ +--- +title: "Editor Support" +linkTitle: "Editor Support" +weight: 209 +--- + + + +## Overview + +When editing an Avro schema, everyone has a preferred editor. Some use an IDE, like IntelliJ, Pycharm, RustRover or +Visual Studio, while others prefer a more humble text editor like Pulsar, Emacs or Vim. + +Most provide help when working with Avro via JSON support (for JSON schemata) by offering syntax highlighting and +formatting. Some provide more help, for example syntax highlighting for IDL schemata, code completion, error +highlighting and more. + +The IDEs and editors that we know to support Avro better than just editing schemata as JSON files are listed below in +alphabetical order. + +## Eclipse + +Eclipse is a free IDE that provides extensions via the [Eclipse Marketplace](https://marketplace.eclipse.org/). + +### Avroclipse + +[Extension](https://marketplace.eclipse.org/content/avroclipse) ([source](https://github.com/dvdkruk/avroclipse)) + +Released in 2015, this plugin received its last update in December 2019. + +Features for IDL files: + +* Syntax Highlighting +* Code Completion +* Error Highlighting + +## Emacs + +Sometimes ridiculed as an operating system, Emacs is a rich text editor. Many packages are available +via one of the three main package archives: [GNU ELPA](https://elpa.gnu.org/), [non-GNU ELPA](https://elpa.nongnu.org/) +or [MELPA](https://melpa.org/). + +### emacs-avro + +[Package source](https://github.com/logc/emacs-avro) + +Originally released elsewhere in 2013, the last update was in March 2021. + +This package is not available via the main package archives. + +Provides syntax highlighting for IDL schemata. + +## JetBrains IDE family + +JetBrains features an entire line of IDEs. They include IntelliJ IDEA, PyCharm, PhpStorm, GoLand, Rider, CLion, +RustRover, WebStorm, RubyMine, +DataGrip, DataSpell, ReSharper, Fleet, and Aqua. + +All of them use the [Plugin Marketplace](https://plugins.jetbrains.com/) to load extensions from. + +### Apache Avro IDL Schema Support + +[Plugin](https://plugins.jetbrains.com/plugin/15728-apache-avro-idl-schema-support) ([source](https://github.com/opwvhk/avro-schema-support?tab=readme-ov-file#intellij-plugin-for-apache-avro-idl)) + +Released in 2021, this plugin received its last update in April 2025. + +Features for Avro schema & protocol definitions: + +* Syntax Highlighting +* Code Completion +* Code Formatting +* Error Highlighting +* Inspections & quick fixes (IDL only) +* New file templates + +The plugin supports all JetBrains products. + +### Avro and Parquet Viewer + +[Plugin](https://plugins.jetbrains.com/plugin/12281-avro-and-parquet-viewer) ([source](https://github.com/benwatson528/intellij-avro-parquet-plugin))] + +Released in 2021, this plugin received its last update in November 2022. + +Allows previewing `.avro` files, and shows its schema in an editor tab. + +### Big Data File Viewer + +[Plugin](https://plugins.jetbrains.com/plugin/21701-big-data-file-viewer) ( +part of the [Big Data Tools](https://plugins.jetbrains.com/bundles/8-big-data-tools)) + +Released in 2023, this plugin is actively developed by JetBrains. + +Allows previewing `.avro` files, and shows its schema in the structure tool window. + +## Pulsar + +A Chromium-based text editor, Pulsar touts itself as community-led and hyper-hackable. Extensions can be found in +its [Package Repository](https://web.pulsar-edit.dev/). + +### atom-language-avro + +[Package](https://web.pulsar-edit.dev/packages/atom-language-avro) ([source](https://github.com/jonesetc/atom-language-avro)) + +Released in 2015, this package provides syntax highlighting for IDL schemata. + +## Vim + +### vim-avro + +[Plugin source](https://github.com/gurpreetatwal/vim-avro?tab=readme-ov-file#vim-avro) + +Released in December 2016, this plugin has received no updates since. + +Features syntax highlighting for IDL schemata, + +### avro-idl.vim + +[Plugin source](https://github.com/apache/avro/blob/main/share/editors/avro-idl.vim) + +Featured in the Avro repository `share/editors` directory. Introduced in 2010, this plugin received its last update in +June 2019. + +Features syntax highlighting for IDL schemata, + +## Visual Studio Code: + +Being a Microsoft product, Visual Studio Code provides extensions via +a [Plugin Marketplace](https://marketplace.visualstudio.com/). + +### avro-idl + +[Plugin](https://marketplace.visualstudio.com/items?itemName=streetsidesoftware.avro) ([source](https://github.com/streetsidesoftware/vscode-avro-ext?tab=readme-ov-file#avro-syntax-highlighter)) + +Released in 2017, this plugin received its last update in June 2021, + +It provides syntax highlighting for IDL and JSON schemata. + +### avro-tools + +[Plugin](https://marketplace.visualstudio.com/items?itemName=tomaszbartoszewski.avro-tools) ([source](https://github.com/tomaszbartoszewski/vscode-avro-tools?tab=readme-ov-file#avro-tools)) + +Released in 2020, this plugin has not received updates after its release day. + +It does provide more features than most plugins: + +* syntax highlighting for `.avsc` files +* formatting +* snippets for fields with types. + +### avro-viewer + +[Plugin](https://marketplace.visualstudio.com/items?itemName=yasunari89.avro-viewer) ([source](https://github.com/yasunari89/avro-viewer?tab=readme-ov-file#avro-viewer-readme)) + +Released in 2023, this plugin received its last update in June 2023. + +It allows you to preview `.avro` files (uncompressed only), with its schema. diff --git a/doc/content/en/docs/++version++/Editor Support/eclipseAvroclipse.png b/doc/content/en/docs/++version++/Editor Support/eclipseAvroclipse.png new file mode 100644 index 00000000000..1c52d56d22a Binary files /dev/null and b/doc/content/en/docs/++version++/Editor Support/eclipseAvroclipse.png differ diff --git a/doc/content/en/docs/++version++/Editor Support/jetbrainsAvroAndParquetViewer.svg b/doc/content/en/docs/++version++/Editor Support/jetbrainsAvroAndParquetViewer.svg new file mode 100644 index 00000000000..8683ac6a64c --- /dev/null +++ b/doc/content/en/docs/++version++/Editor Support/jetbrainsAvroAndParquetViewer.svg @@ -0,0 +1,6 @@ + + + + + + diff --git a/doc/content/en/docs/++version++/Editor Support/jetbrainsAvroIdlSchemaSupport.svg b/doc/content/en/docs/++version++/Editor Support/jetbrainsAvroIdlSchemaSupport.svg new file mode 100644 index 00000000000..9aff5371a66 --- /dev/null +++ b/doc/content/en/docs/++version++/Editor Support/jetbrainsAvroIdlSchemaSupport.svg @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/content/en/docs/++version++/Editor Support/jetbrainsBigDataFileViewer.svg b/doc/content/en/docs/++version++/Editor Support/jetbrainsBigDataFileViewer.svg new file mode 100644 index 00000000000..675b3e7be80 --- /dev/null +++ b/doc/content/en/docs/++version++/Editor Support/jetbrainsBigDataFileViewer.svg @@ -0,0 +1,13 @@ + + + + + + + + + + diff --git a/doc/content/en/docs/++version++/Editor Support/visualStudioAvroIdl.png b/doc/content/en/docs/++version++/Editor Support/visualStudioAvroIdl.png new file mode 100644 index 00000000000..3525834e217 Binary files /dev/null and b/doc/content/en/docs/++version++/Editor Support/visualStudioAvroIdl.png differ diff --git a/doc/content/en/docs/++version++/Editor Support/visualStudioAvroTools.png b/doc/content/en/docs/++version++/Editor Support/visualStudioAvroTools.png new file mode 100644 index 00000000000..ebe675d7ecd Binary files /dev/null and b/doc/content/en/docs/++version++/Editor Support/visualStudioAvroTools.png differ diff --git a/doc/content/en/docs/++version++/Editor Support/visualStudioAvroViewer.png b/doc/content/en/docs/++version++/Editor Support/visualStudioAvroViewer.png new file mode 100644 index 00000000000..0c06035f11d Binary files /dev/null and b/doc/content/en/docs/++version++/Editor Support/visualStudioAvroViewer.png differ diff --git a/doc/content/en/docs/++version++/Getting started (Java)/_index.md b/doc/content/en/docs/++version++/Getting started (Java)/_index.md new file mode 100644 index 00000000000..44aeae4767d --- /dev/null +++ b/doc/content/en/docs/++version++/Getting started (Java)/_index.md @@ -0,0 +1,291 @@ +--- +categories: [] +tags: ["java"] +title: "Getting Started (Java)" +linkTitle: "Getting Started (Java)" +weight: 2 +aliases: +- /docs/current/getting-started-java/ +--- + + + +This is a short guide for getting started with Apache Avroâ„ĸ using Java. This guide only covers using Avro for data serialization; see Patrick Hunt's [Avro RPC Quick Start](https://github.com/phunt/avro-rpc-quickstart) for a good introduction to using Avro for RPC. + +## Download + +Avro implementations for C, C++, C#, Java, PHP, Python, and Ruby can be downloaded from the [Apache Avroâ„ĸ Download]({{< relref "/project/download" >}}) page. This guide uses Avro {{< avro_version >}}, the latest version at the time of writing. For the examples in this guide, download avro-{{< avro_version >}}.jar and avro-tools-{{< avro_version >}}.jar. + +Alternatively, if you are using Maven, add the following dependency to your POM: + +```xml + + org.apache.avro + avro + {{< avro_version >}} + +``` + +As well as the Avro Maven plugin (for performing code generation): + +```xml + + org.apache.avro + avro-maven-plugin + {{< avro_version >}} + + ${project.basedir}/src/main/avro/ + ${project.basedir}/src/main/java/ + + + + generate-sources + + schema + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + 1.8 + 1.8 + + +``` + +You may also build the required Avro jars from source. Building Avro is beyond the scope of this guide; see the Build Documentation page in the wiki for more information. + +## Defining a schema + +Avro schemas are defined using JSON or IDL (the latter requires an extra dependency). Schemas are composed of primitive types (null, boolean, int, long, float, double, bytes, and string) and complex types (record, enum, array, map, union, and fixed). You can learn more about Avro schemas and types from the specification, but for now let's start with a simple schema example, user.avsc: + +```json +{"namespace": "example.avro", + "type": "record", + "name": "User", + "fields": [ + {"name": "name", "type": "string"}, + {"name": "favorite_number", "type": ["int", "null"]}, + {"name": "favorite_color", "type": ["string", "null"]} + ] +} +``` + +This schema defines a record representing a hypothetical user. (Note that a schema file can only contain a single schema definition.) At minimum, a record definition must include its type ("type": "record"), a name ("name": "User"), and fields, in this case name, favorite_number, and favorite_color. We also define a namespace ("namespace": "example.avro"), which together with the name attribute defines the "full name" of the schema (example.avro.User in this case). + +Fields are defined via an array of objects, each of which defines a name and type (other attributes are optional, see the record specification for more details). The type attribute of a field is another schema object, which can be either a primitive or complex type. For example, the name field of our User schema is the primitive type string, whereas the favorite_number and favorite_color fields are both unions, represented by JSON arrays. unions are a complex type that can be any of the types listed in the array; e.g., favorite_number can either be an int or null, essentially making it an optional field. + +## Serializing and deserializing with code generation + +### Compiling the schema +Code generation allows us to automatically create classes based on our previously-defined schema. Once we have defined the relevant classes, there is no need to use the schema directly in our programs. We use the avro-tools jar to generate code as follows: + +```shell +java -jar /path/to/avro-tools-{{< avro_version >}}.jar compile schema +``` + +This will generate the appropriate source files in a package based on the schema's namespace in the provided destination folder. For instance, to generate a User class in package example.avro from the schema defined above, run + +```shell +java -jar /path/to/avro-tools-{{< avro_version >}}.jar compile schema user.avsc . +``` + +Note that if you using the Avro Maven plugin, there is no need to manually invoke the schema compiler; the plugin automatically performs code generation on any .avsc files present in the configured source directory. + +### Creating Users +Now that we've completed the code generation, let's create some Users, serialize them to a data file on disk, and then read back the file and deserialize the User objects. + +First let's create some Users and set their fields. + +```java +User user1 = new User(); +user1.setName("Alyssa"); +user1.setFavoriteNumber(256); +// Leave favorite color null + +// Alternate constructor +User user2 = new User("Ben", 7, "red"); + +// Construct via builder +User user3 = User.newBuilder() + .setName("Charlie") + .setFavoriteColor("blue") + .setFavoriteNumber(null) + .build(); +``` + +As shown in this example, Avro objects can be created either by invoking a constructor directly or by using a builder. Unlike constructors, builders will automatically set any default values specified in the schema. Additionally, builders validate the data as it set, whereas objects constructed directly will not cause an error until the object is serialized. However, using constructors directly generally offers better performance, as builders create a copy of the datastructure before it is written. + +Note that we do not set user1's favorite color. Since that record is of type ["string", "null"], we can either set it to a string or leave it null; it is essentially optional. Similarly, we set user3's favorite number to null (using a builder requires setting all fields, even if they are null). + +### Serializing +Now let's serialize our Users to disk. + +```java +// Serialize user1, user2 and user3 to disk +DatumWriter userDatumWriter = new SpecificDatumWriter(User.class); +DataFileWriter dataFileWriter = new DataFileWriter(userDatumWriter); +dataFileWriter.create(user1.getSchema(), new File("users.avro")); +dataFileWriter.append(user1); +dataFileWriter.append(user2); +dataFileWriter.append(user3); +dataFileWriter.close(); +``` + +We create a DatumWriter, which converts Java objects into an in-memory serialized format. The SpecificDatumWriter class is used with generated classes and extracts the schema from the specified generated type. + +Next we create a DataFileWriter, which writes the serialized records, as well as the schema, to the file specified in the dataFileWriter.create call. We write our users to the file via calls to the dataFileWriter.append method. When we are done writing, we close the data file. + +### Deserializing +Finally, let's deserialize the data file we just created. + +```java +// Deserialize Users from disk +DatumReader userDatumReader = new SpecificDatumReader(User.class); +DataFileReader dataFileReader = new DataFileReader(file, userDatumReader); +User user = null; +while (dataFileReader.hasNext()) { +// Reuse user object by passing it to next(). This saves us from +// allocating and garbage collecting many objects for files with +// many items. +user = dataFileReader.next(user); +System.out.println(user); +} +``` + +This snippet will output: + +```json +{"name": "Alyssa", "favorite_number": 256, "favorite_color": null} +{"name": "Ben", "favorite_number": 7, "favorite_color": "red"} +{"name": "Charlie", "favorite_number": null, "favorite_color": "blue"} +``` + +Deserializing is very similar to serializing. We create a SpecificDatumReader, analogous to the SpecificDatumWriter we used in serialization, which converts in-memory serialized items into instances of our generated class, in this case User. We pass the DatumReader and the previously created File to a DataFileReader, analogous to the DataFileWriter, which reads both the schema used by the writer as well as the data from the file on disk. The data will be read using the writer's schema included in the file and the schema provided by the reader, in this case the User class. The writer's schema is needed to know the order in which fields were written, while the reader's schema is needed to know what fields are expected and how to fill in default values for fields added since the file was written. If there are differences between the two schemas, they are resolved according to the Schema Resolution specification. + +Next we use the DataFileReader to iterate through the serialized Users and print the deserialized object to stdout. Note how we perform the iteration: we create a single User object which we store the current deserialized user in, and pass this record object to every call of dataFileReader.next. This is a performance optimization that allows the DataFileReader to reuse the same User object rather than allocating a new User for every iteration, which can be very expensive in terms of object allocation and garbage collection if we deserialize a large data file. While this technique is the standard way to iterate through a data file, it's also possible to use for (User user : dataFileReader) if performance is not a concern. + +### Compiling and running the example code +This example code is included as a Maven project in the examples/java-example directory in the Avro docs. From this directory, execute the following commands to build and run the example: + +```shell +$ mvn compile # includes code generation via Avro Maven plugin +$ mvn -q exec:java -Dexec.mainClass=example.SpecificMain +``` + +### Beta feature: Generating faster code +In release 1.9.0, we introduced a new approach to generating code that speeds up decoding of objects by more than 10% and encoding by more than 30% (future performance enhancements are underway). To ensure a smooth introduction of this change into production systems, this feature is controlled by a feature flag, the system property org.apache.avro.specific.use_custom_coders. In this first release, this feature is off by default. To turn it on, set the system flag to true at runtime. In the sample above, for example, you could enable the fater coders as follows: + +$ mvn -q exec:java -Dexec.mainClass=example.SpecificMain \ + -Dorg.apache.avro.specific.use_custom_coders=true + +Note that you do not have to recompile your Avro schema to have access to this feature. The feature is compiled and built into your code, and you turn it on and off at runtime using the feature flag. As a result, you can turn it on during testing, for example, and then off in production. Or you can turn it on in production, and quickly turn it off if something breaks. + +We encourage the Avro community to exercise this new feature early to help build confidence. (For those paying on demand for compute resources in the cloud, it can lead to meaningful cost savings.) As confidence builds, we will turn this feature on by default, and eventually eliminate the feature flag (and the old code). + +## Serializing and deserializing without code generation +Data in Avro is always stored with its corresponding schema, meaning we can always read a serialized item regardless of whether we know the schema ahead of time. This allows us to perform serialization and deserialization without code generation. + +Let's go over the same example as in the previous section, but without using code generation: we'll create some users, serialize them to a data file on disk, and then read back the file and deserialize the users objects. + +### Creating users +First, we use a SchemaParser to read our schema definition and create a Schema object. + +```java +Schema schema = new SchemaParser().parse(new File("user.avsc")).mainSchema(); +``` + +Using this schema, let's create some users. + +```java +GenericRecord user1 = new GenericData.Record(schema); +user1.put("name", "Alyssa"); +user1.put("favorite_number", 256); +// Leave favorite color null + +GenericRecord user2 = new GenericData.Record(schema); +user2.put("name", "Ben"); +user2.put("favorite_number", 7); +user2.put("favorite_color", "red"); +``` + +Since we're not using code generation, we use GenericRecords to represent users. GenericRecord uses the schema to verify that we only specify valid fields. If we try to set a non-existent field (e.g., user1.put("favorite_animal", "cat")), we'll get an AvroRuntimeException when we run the program. + +Note that we do not set user1's favorite color. Since that record is of type ["string", "null"], we can either set it to a string or leave it null; it is essentially optional. + +### Serializing +Now that we've created our user objects, serializing and deserializing them is almost identical to the example above which uses code generation. The main difference is that we use generic instead of specific readers and writers. + +First we'll serialize our users to a data file on disk. + +```java +// Serialize user1 and user2 to disk +File file = new File("users.avro"); +DatumWriter datumWriter = new GenericDatumWriter(schema); +DataFileWriter dataFileWriter = new DataFileWriter(datumWriter); +dataFileWriter.create(schema, file); +dataFileWriter.append(user1); +dataFileWriter.append(user2); +dataFileWriter.close(); +``` + +We create a DatumWriter, which converts Java objects into an in-memory serialized format. Since we are not using code generation, we create a GenericDatumWriter. It requires the schema both to determine how to write the GenericRecords and to verify that all non-nullable fields are present. + +As in the code generation example, we also create a DataFileWriter, which writes the serialized records, as well as the schema, to the file specified in the dataFileWriter.create call. We write our users to the file via calls to the dataFileWriter.append method. When we are done writing, we close the data file. + +### Deserializing +Finally, we'll deserialize the data file we just created. + +```java +// Deserialize users from disk +DatumReader datumReader = new GenericDatumReader(schema); +DataFileReader dataFileReader = new DataFileReader(file, datumReader); +GenericRecord user = null; +while (dataFileReader.hasNext()) { +// Reuse user object by passing it to next(). This saves us from +// allocating and garbage collecting many objects for files with +// many items. +user = dataFileReader.next(user); +System.out.println(user); +``` + +This outputs: + +```json +{"name": "Alyssa", "favorite_number": 256, "favorite_color": null} +{"name": "Ben", "favorite_number": 7, "favorite_color": "red"} +``` + +Deserializing is very similar to serializing. We create a GenericDatumReader, analogous to the GenericDatumWriter we used in serialization, which converts in-memory serialized items into GenericRecords. We pass the DatumReader and the previously created File to a DataFileReader, analogous to the DataFileWriter, which reads both the schema used by the writer as well as the data from the file on disk. The data will be read using the writer's schema included in the file, and the reader's schema provided to the GenericDatumReader. The writer's schema is needed to know the order in which fields were written, while the reader's schema is needed to know what fields are expected and how to fill in default values for fields added since the file was written. If there are differences between the two schemas, they are resolved according to the Schema Resolution specification. + +Next, we use the DataFileReader to iterate through the serialized users and print the deserialized object to stdout. Note how we perform the iteration: we create a single GenericRecord object which we store the current deserialized user in, and pass this record object to every call of dataFileReader.next. This is a performance optimization that allows the DataFileReader to reuse the same record object rather than allocating a new GenericRecord for every iteration, which can be very expensive in terms of object allocation and garbage collection if we deserialize a large data file. While this technique is the standard way to iterate through a data file, it's also possible to use for (GenericRecord user : dataFileReader) if performance is not a concern. + +### Compiling and running the example code +This example code is included as a Maven project in the examples/java-example directory in the Avro docs. From this directory, execute the following commands to build and run the example: + +```shell +$ mvn compile +$ mvn -q exec:java -Dexec.mainClass=example.GenericMain +``` diff --git a/doc/content/en/docs/++version++/Getting started (Python)/_index.md b/doc/content/en/docs/++version++/Getting started (Python)/_index.md new file mode 100644 index 00000000000..cc9a8d77494 --- /dev/null +++ b/doc/content/en/docs/++version++/Getting started (Python)/_index.md @@ -0,0 +1,149 @@ +--- +categories: [] +tags: ["python"] +title: "Getting Started (Python)" +linkTitle: "Getting Started (Python)" +weight: 3 +aliases: +- /docs/current/getting-started-python/ +--- + + + +This is a short guide for getting started with Apache Avroâ„ĸ using Python. This guide only covers using Avro for data serialization; see Patrick Hunt's Avro RPC Quick Start for a good introduction to using Avro for RPC. + +## Notice for Python 3 users +A package called "avro-python3" had been provided to support Python 3 previously, but the codebase was consolidated into the "avro" package and that supports both Python 2 and 3 now. The avro-python3 package will be removed in the near future, so users should use the "avro" package instead. They are mostly API compatible, but there's a few minor difference (e.g., function name capitalization, such as avro.schema.Parse vs avro.schema.parse). + +## Download +For Python, the easiest way to get started is to install it from PyPI. Python's Avro API is available over PyPi. + +```shell +$ python3 -m pip install avro +``` + +The official releases of the Avro implementations for C, C++, C#, Java, PHP, Python, and Ruby can be downloaded from the Apache Avroâ„ĸ Releases page. This guide uses Avro {{< avro_version >}}, the latest version at the time of writing. Download and unzip avro-{{< avro_version >}}.tar.gz, and install via python setup.py (this will probably require root privileges). Ensure that you can import avro from a Python prompt. + +```shell +$ tar xvf avro-{{< avro_version >}}.tar.gz +$ cd avro-{{< avro_version >}} +$ python setup.py install +$ python +>>> import avro # should not raise ImportError +``` + +Alternatively, you may build the Avro Python library from source. From your the root Avro directory, run the commands + +```shell +$ cd lang/py/ +$ python3 -m pip install -e . +$ python +``` + +## Defining a schema +Avro schemas are defined using JSON. Schemas are composed of primitive types (null, boolean, int, long, float, double, bytes, and string) and complex types (record, enum, array, map, union, and fixed). You can learn more about Avro schemas and types from the specification, but for now let's start with a simple schema example, user.avsc: + +```json +{"namespace": "example.avro", + "type": "record", + "name": "User", + "fields": [ + {"name": "name", "type": "string"}, + {"name": "favorite_number", "type": ["int", "null"]}, + {"name": "favorite_color", "type": ["string", "null"]} + ] +} +``` + +This schema defines a record representing a hypothetical user. (Note that a schema file can only contain a single schema definition.) At minimum, a record definition must include its type ("type": "record"), a name ("name": "User"), and fields, in this case name, favorite_number, and favorite_color. We also define a namespace ("namespace": "example.avro"), which together with the name attribute defines the "full name" of the schema (example.avro.User in this case). + +Fields are defined via an array of objects, each of which defines a name and type (other attributes are optional, see the record specification for more details). The type attribute of a field is another schema object, which can be either a primitive or complex type. For example, the name field of our User schema is the primitive type string, whereas the favorite_number and favorite_color fields are both unions, represented by JSON arrays. unions are a complex type that can be any of the types listed in the array; e.g., favorite_number can either be an int or null, essentially making it an optional field. + +## Serializing and deserializing without code generation +Data in Avro is always stored with its corresponding schema, meaning we can always read a serialized item, regardless of whether we know the schema ahead of time. This allows us to perform serialization and deserialization without code generation. Note that the Avro Python library does not support code generation. + +Try running the following code snippet, which serializes two users to a data file on disk, and then reads back and deserializes the data file: + +```python +import avro.schema +from avro.datafile import DataFileReader, DataFileWriter +from avro.io import DatumReader, DatumWriter + +schema = avro.schema.parse(open("user.avsc", "rb").read()) + +writer = DataFileWriter(open("users.avro", "wb"), DatumWriter(), schema) +writer.append({"name": "Alyssa", "favorite_number": 256}) +writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"}) +writer.close() + +reader = DataFileReader(open("users.avro", "rb"), DatumReader()) +for user in reader: + print(user) +reader.close() +``` + +This outputs: + +```json +{'favorite_color': None, 'favorite_number': 256, 'name': 'Alyssa'} +{'favorite_color': 'red', 'favorite_number': 7, 'name': 'Ben'} +``` + +Do make sure that you open your files in binary mode (i.e. using the modes wb or rb respectively). Otherwise you might generate corrupt files due to automatic replacement of newline characters with the platform-specific representations. + +Let's take a closer look at what's going on here. + +```python +schema = avro.schema.parse(open("user.avsc", "rb").read()) +``` + +avro.schema.parse takes a string containing a JSON schema definition as input and outputs a avro.schema.Schema object (specifically a subclass of Schema, in this case RecordSchema). We're passing in the contents of our user.avsc schema file here. + +```python +writer = DataFileWriter(open("users.avro", "wb"), DatumWriter(), schema) +``` + +We create a DataFileWriter, which we'll use to write serialized items to a data file on disk. The DataFileWriter constructor takes three arguments: + +* The file we'll serialize to +* A DatumWriter, which is responsible for actually serializing the items to Avro's binary format (DatumWriters can be used separately from DataFileWriters, e.g., to perform IPC with Avro). +* The schema we're using. The DataFileWriter needs the schema both to write the schema to the data file, and to verify that the items we write are valid items and write the appropriate fields. + +```python +writer.append({"name": "Alyssa", "favorite_number": 256}) +writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"}) +``` + +We use DataFileWriter.append to add items to our data file. Avro records are represented as Python dicts. Since the field favorite_color has type ["string", "null"], we are not required to specify this field, as shown in the first append. Were we to omit the required name field, an exception would be raised. Any extra entries not corresponding to a field are present in the dict are ignored. + +```python +reader = DataFileReader(open("users.avro", "rb"), DatumReader()) +``` + +We open the file again, this time for reading back from disk. We use a DataFileReader and DatumReader analagous to the DataFileWriter and DatumWriter above. + +```python +for user in reader: + print(user) +``` + +The DataFileReader is an iterator that returns dicts corresponding to the serialized items. diff --git a/doc/content/en/docs/++version++/IDL Language/_index.md b/doc/content/en/docs/++version++/IDL Language/_index.md new file mode 100644 index 00000000000..448035e5fe6 --- /dev/null +++ b/doc/content/en/docs/++version++/IDL Language/_index.md @@ -0,0 +1,456 @@ +--- +title: "IDL Language" +linkTitle: "IDL Language" +weight: 201 +aliases: +- /docs/current/idl-language/ +--- + + + +## Introduction +This document defines Avro IDL, a higher-level language for authoring Avro schemata. Before reading this document, you should have familiarity with the concepts of schemata and protocols, as well as the various primitive and complex types available in Avro. + +## Overview + +### Purpose +The aim of the Avro IDL language is to enable developers to author schemata in a way that feels more similar to common programming languages like Java, C++, or Python. Additionally, the Avro IDL language may feel more familiar for those users who have previously used the interface description languages (IDLs) in other frameworks like Thrift, Protocol Buffers, or CORBA. + +### Usage +Each Avro IDL file defines either a single Avro Protocol, or an Avro Schema with supporting named schemata in a namespace. When parsed, it thus yields either a Protocol or a Schema. These can be respectively written to JSON-format Avro Protocol files with extension .avpr or JSON-format Avro Schema files with extension .avsc. + +To convert a _.avdl_ file into a _.avpr_ file, it may be processed by the `idl` tool. For example: +```shell +$ java -jar avro-tools.jar idl src/test/idl/input/namespaces.avdl /tmp/namespaces.avpr +$ head /tmp/namespaces.avpr +{ + "protocol" : "TestNamespace", + "namespace" : "avro.test.protocol", +``` +To convert a _.avdl_ file into a _.avsc_ file, it may be processed by the `idl` tool too. For example: +```shell +$ java -jar avro-tools.jar idl src/test/idl/input/schema_syntax_schema.avdl /tmp/schema_syntax.avsc +$ head /tmp/schema_syntax.avsc +{ + "type": "array", + "items": { + "type": "record", + "name": "StatusUpdate", +``` +The `idl` tool can also process input to and from _stdin_ and _stdout_. See `idl --help` for full usage information. + +A Maven plugin is also provided to compile .avdl files. To use it, add something like the following to your pom.xml: +```xml + + + + org.apache.avro + avro-maven-plugin + + + + idl + + + + + + +``` + +## Defining a Schema in Avro IDL +An Avro IDL file consists of exactly one (main) schema definition. The minimal schema is defined by the following code: +```java +schema int; +``` +This is equivalent to (and generates) the following JSON schema definition: +```json +{ + "type": "int" +} +``` +More complex schemata can also be defined, for example by adding named schemata like this: +```java +namespace default.namespace.for.named.schemata; +schema Message; + +record Message { + string? title = null; + string message; +} +``` +This is equivalent to (and generates) the following JSON schema definition: +```json +{ + "type" : "record", + "name" : "Message", + "namespace" : "default.namespace.for.named.schemata", + "fields" : [ { + "name" : "title", + "type" : [ "null", "string" ], + "default": null + }, { + "name" : "message", + "type" : "string" + } ] +} +``` +Schemata in Avro IDL can contain the following items: + +* Imports of external protocol and schema files (only named schemata are imported). +* Definitions of named schemata, including records, errors, enums, and fixeds. + +## Defining a Protocol in Avro IDL +An Avro IDL file consists of exactly one protocol definition. The minimal protocol is defined by the following code: +```java +protocol MyProtocol { +} +``` +This is equivalent to (and generates) the following JSON protocol definition: +```json +{ +"protocol" : "MyProtocol", + "types" : [ ], + "messages" : { + } +} +``` +The namespace of the protocol may be changed using the @namespace annotation: +```java +@namespace("mynamespace") +protocol MyProtocol { +} +``` +This notation is used throughout Avro IDL as a way of specifying properties for the annotated element, as will be described later in this document. + +Protocols in Avro IDL can contain the following items: + +* Imports of external protocol and schema files. +* Definitions of named schemata, including records, errors, enums, and fixeds. +* Definitions of RPC messages + +## Imports +Files may be imported in one of three formats: + +* An IDL file may be imported with a statement like: + + `import idl "foo.avdl";` + +* A JSON protocol file may be imported with a statement like: + + `import protocol "foo.avpr";` + +* A JSON schema file may be imported with a statement like: + + `import schema "foo.avsc";` + +When importing into an IDL schema file, only (named) types are imported into this file. When importing into an IDL protocol, messages are imported into the protocol as well. + +Imported file names are resolved relative to the current IDL file. + +## Defining an Enumeration +Enums are defined in Avro IDL using a syntax similar to C or Java. An Avro Enum supports optional default values. In the case that a reader schema is unable to recognize a symbol written by the writer, the reader will fall back to using the defined default value. This default is only used when an incompatible symbol is read. It is not used if the enum field is missing. + +Example Writer Enum Definition +```java +enum Shapes { + SQUARE, TRIANGLE, CIRCLE, OVAL +} +``` +Example Reader Enum Definition +```java +enum Shapes { + SQUARE, TRIANGLE, CIRCLE +} = CIRCLE; +``` +In the above example, the reader will use the default value of `CIRCLE` whenever reading data written with the `OVAL` symbol of the writer. Also note that, unlike the JSON format, anonymous enums cannot be defined. + +## Defining a Fixed Length Field +Fixed fields are defined using the following syntax: +``` +fixed MD5(16); +``` +This example defines a fixed-length type called MD5, which contains 16 bytes. + +## Defining Records and Errors +Records are defined in Avro IDL using a syntax similar to a struct definition in C: +```java +record Employee { + string name; + boolean active = true; + long salary; +} +``` +The above example defines a record with the name “Employee” with three fields. + +To define an error, simply use the keyword _error_ instead of _record_. For example: +```java +error Kaboom { + string explanation; + int result_code = -1; +} +``` +Each field in a record or error consists of a type and a name, optional property annotations and an optional default value. + +A type reference in Avro IDL must be one of: + +* A primitive type +* A logical type +* A named schema (either defined or imported) +* A complex type (array, map, or union) + +### Primitive Types +The primitive types supported by Avro IDL are the same as those supported by Avro's JSON format. This list includes _int_, _long_, _string_, _boolean_, _float_, _double_, _null_, and _bytes_. + +### Logical Types +Some of the logical types supported by Avro's JSON format are directly supported by Avro IDL. The currently supported types are: + +* _decimal_ (logical type [decimal]({{< relref "../specification#decimal" >}})) +* _date_ (logical type [date]({{< relref "../specification#date" >}})) +* _time_ms_ (logical type [time-millis]({{< relref "../specification#time-millisecond-precision" >}})) +* _timestamp_ms_ (logical type [timestamp-millis]({{< relref "../specification#timestamp-millisecond-precision" >}})) +* _local_timestamp_ms_ (logical type [local-timestamp-millis]({{< relref "../specification#local_timestamp_ms" >}})) +* _uuid_ (logical type [uuid]({{< relref "../specification#uuid" >}})) + +For example: +```java +record Job { + string jobid; + date submitDate; + time_ms submitTime; + timestamp_ms finishTime; + decimal(9,2) finishRatio; + uuid pk = "a1a2a3a4-b1b2-c1c2-d1d2-d3d4d5d6d7d8"; +} +``` + +Logical types can also be specified via an annotation, which is useful for logical types for which a keyword does not exist: + +```java +record Job { + string jobid; + @logicalType("timestamp-micros") + long finishTime; +} +``` + +### References to Named Schemata +If a named schema has already been defined in the same Avro IDL file, it may be referenced by name as if it were a primitive type: +```java +record Card { + Suit suit; // refers to the enum Card defined above + int number; +} +``` + +### Default Values +Default values for fields may be optionally specified by using an equals sign after the field name followed by a JSON expression indicating the default value. This JSON is interpreted as described in the [spec]({{< relref "../specification#schema-record" >}}). + +### Complex Types + +#### Arrays +Array types are written in a manner that will seem familiar to C++ or Java programmers. An array of any type t is denoted `array`. For example, an array of strings is denoted `array`, and a multidimensional array of Foo records would be `array>`. + +#### Maps +Map types are written similarly to array types. An array that contains values of type t is written `map`. As in the JSON schema format, all maps contain `string`-type keys. + +#### Unions +Union types are denoted as `union { typeA, typeB, typeC, ... }`. For example, this record contains a string field that is optional (unioned with null), and a field containing either a precise or a imprecise number: +```java +record RecordWithUnion { + union { null, string } optionalString; + union { decimal(12, 6), float } number; +} +``` +Note that the same restrictions apply to Avro IDL unions as apply to unions defined in the JSON format; namely, a union may not contain multiple elements of the same type. Also, fields/parameters that use the union type and have a default parameter must specify a default value of the same type as the **first** union type. + +Because it occurs so often, there is a special shorthand to denote a union of `null` with one other schema. The first three fields in the following snippet have identical schemata, as do the last two fields: + +```java +record RecordWithUnion { + union { null, string } optionalString1 = null; + string? optionalString2 = null; + string? optionalString3; // No default value + + union { string, null } optionalString4 = "something"; + string? optionalString5 = "something else"; +} +``` + +Note that unlike explicit unions, the position of the `null` type is fluid; it will be the first or last type depending on the default value (if any). So all fields are valid in the example above. + +## Defining RPC Messages +The syntax to define an RPC message within a Avro IDL protocol is similar to the syntax for a method declaration within a C header file or a Java interface. To define an RPC message _add_ which takes two arguments named _foo_ and _bar_, returning an _int_, simply include the following definition within the protocol: +```java +int add(int foo, int bar = 0); +``` +Message arguments, like record fields, may specify default values. + +To define a message with no response, you may use the alias _void_, equivalent to the Avro _null_ type: +```java +void logMessage(string message); +``` +If you have defined or imported an error type within the same protocol, you may declare that a message can throw this error using the syntax: +```java +void goKaboom() throws Kaboom; +``` +To define a one-way message, use the keyword `oneway` after the parameter list, for example: +```java +void fireAndForget(string message) oneway; +``` + +## Other Language Features + +### Comments and documentation +All Java-style comments are supported within a Avro IDL file. Any text following _//_ on a line is ignored, as is any text between _/*_ and _*/_, possibly spanning multiple lines. + +Comments that begin with _/**_ are used as the documentation string for the type or field definition that follows the comment. + +### Escaping Identifiers +Occasionally, one may want to distinguish between identifiers and languages keywords. In order to do so, backticks (`) may be used to escape +the identifier. For example, to define a message with the literal name error, you may write: +```java +void `error`(); +``` +This syntax is allowed anywhere an identifier is expected. + +### Annotations for Ordering and Namespaces +Java-style annotations may be used to add additional properties to types and fields throughout Avro IDL. These can be custom properties, or +special properties as used in the JSON-format Avro Schema and Protocol files. + +For example, to specify the sort order of a field within a record, one may use the `@order` annotation before the field name as follows: +```java +record MyRecord { + string @order("ascending") myAscendingSortField; + string @order("descending") myDescendingField; + string @order("ignore") myIgnoredField; +} +``` +A field's type (with the exception of type references) may also be preceded by annotations, e.g.: +```java +record MyRecord { + @java-class("java.util.ArrayList") array myStrings; +} +``` +This can be used to support java classes that can be serialized/deserialized via their `toString`/`String constructor`, e.g.: +```java +record MyRecord { + @java-class("java.math.BigDecimal") string value; + @java-key-class("java.io.File") map fileStates; + array<@java-class("java.math.BigDecimal") string> weights; +} +``` +Similarly, a `@namespace` annotation may be used to modify the namespace when defining a named schema. For example: +```java +@namespace("org.apache.avro.firstNamespace") +protocol MyProto { + @namespace("org.apache.avro.someOtherNamespace") + record Foo {} + + record Bar {} +} +``` +will define a protocol in the _firstNamespace_ namespace. The record _Foo_ will be defined in _someOtherNamespace_ and _Bar_ will be defined in _firstNamespace_ as it inherits its default from its container. + +Type and field aliases are specified with the `@aliases` annotation as follows: +```java +@aliases(["org.old.OldRecord", "org.ancient.AncientRecord"]) +record MyRecord { + string @aliases(["oldField", "ancientField"]) myNewField; +} +``` +Some annotations like those listed above are handled specially. All other annotations are added as properties to the protocol, message, schema or field. You can use any identifier or series of identifiers separated by dots and/or dashes as property name. + +## Complete Example +The following is an example of two Avro IDL files that together show most of the above features: + +### schema.avdl +```java +/* + * Header with license information. + */ +// Optional default namespace (if absent, the default namespace is the null namespace). +namespace org.apache.avro.test; +// Optional main schema definition; if used, the IDL file is equivalent to a .avsc file. +schema TestRecord; + +/** Documentation for the enum type Kind */ +@aliases(["org.foo.KindOf"]) +enum Kind { + FOO, + BAR, // the bar enum value + BAZ +} = FOO; // For schema evolution purposes, unmatched values do not throw an error, but are resolved to FOO. + +/** MD5 hash; good enough to avoid most collisions, and smaller than (for example) SHA256. */ +fixed MD5(16); + +record TestRecord { + /** Record name; has no intrinsic order */ + string @order("ignore") name; + + Kind @order("descending") kind; + + MD5 hash; + + /* + Note that 'null' is the first union type. Just like .avsc / .avpr files, the default value must be of the first union type. + */ + union { null, MD5 } /** Optional field */ @aliases(["hash"]) nullableHash = null; + // Shorthand syntax; the null in this union is placed based on the default value (or first is there's no default). + MD5? anotherNullableHash = null; + + array arrayOfLongs; +} +``` + +### protocol.avdl +```java +/* + * Header with license information. + */ + +/** + * An example protocol in Avro IDL + */ +@namespace("org.apache.avro.test") +protocol Simple { + // Import the example file above + import idl "schema.avdl"; + + /** Errors are records that can be thrown from a method */ + error TestError { + string message; + } + + string hello(string greeting); + /** Return what was given. Demonstrates the use of backticks to name types/fields/messages/parameters after keywords */ + TestRecord echo(TestRecord `record`); + int add(int arg1, int arg2); + bytes echoBytes(bytes data); + void `error`() throws TestError; + // The oneway keyword forces the method to return null. + void ping() oneway; +} +``` + +Additional examples may be found in the Avro source tree under the `src/test/idl/input` directory. diff --git a/doc/content/en/docs/++version++/MapReduce guide/_index.md b/doc/content/en/docs/++version++/MapReduce guide/_index.md new file mode 100644 index 00000000000..86f776fb420 --- /dev/null +++ b/doc/content/en/docs/++version++/MapReduce guide/_index.md @@ -0,0 +1,398 @@ +--- +title: "MapReduce guide" +linkTitle: "MapReduce guide" +weight: 200 +aliases: +- /docs/current/mapreduce-guide/ +--- + + + +Avro provides a convenient way to represent complex data structures within a Hadoop MapReduce job. Avro data can be used as both input to and output from a MapReduce job, as well as the intermediate format. The example in this guide uses Avro data for all three, but it's possible to mix and match; for instance, MapReduce can be used to aggregate a particular field in an Avro record. + +This guide assumes basic familiarity with both Hadoop MapReduce and Avro. See the [Hadoop documentation](https://hadoop.apache.org/docs/current/) and the [Avro getting started guide](./getting-started-java/) for introductions to these projects. This guide uses the old MapReduce API (`org.apache.hadoop.mapred`) and the new MapReduce API (`org.apache.hadoop.mapreduce`). + +## Setup +The code from this guide is included in the Avro docs under examples/mr-example. The example is set up as a Maven project that includes the necessary Avro and MapReduce dependencies and the Avro Maven plugin for code generation, so no external jars are needed to run the example. In particular, the POM includes the following dependencies: +```xml + + org.apache.avro + avro + {{< avro_version >}} + + + org.apache.avro + avro-mapred + {{< avro_version >}} + + + org.apache.hadoop + hadoop-client + 3.1.2 + +``` +And the following plugin: +```xml + + org.apache.avro + avro-maven-plugin + {{< avro_version >}} + + + generate-sources + + schema + + + ${project.basedir}/../ + ${project.basedir}/target/generated-sources/ + + + + +``` + +If you do not configure the *sourceDirectory* and *outputDirectory* properties, the defaults will be used. The *sourceDirectory* property defaults to *src/main/avro*. The *outputDirectory* property defaults to *target/generated-sources*. You can change the paths to match your project layout. + +Alternatively, Avro jars can be downloaded directly from the Apache Avroâ„ĸ Releases [page](https://avro.apache.org/releases.html). The relevant Avro jars for this guide are *avro-{{< avro_version >}}.jar* and *avro-mapred-{{< avro_version >}}.jar*, as well as *avro-tools-{{< avro_version >}}.jar* for code generation and viewing Avro data files as JSON. In addition, you will need to install Hadoop in order to use MapReduce. + +## Example: ColorCount +Below is a simple example of a MapReduce that uses Avro. There is an example for both the old (org.apache.hadoop.mapred) and new (org.apache.hadoop.mapreduce) APIs under *examples/mr-example/src/main/java/example/*. _MapredColorCount_ is the example for the older mapred API while _MapReduceColorCount_ is the example for the newer mapreduce API. Both examples are below, but we will detail the mapred API in our subsequent examples. + +MapredColorCount.java: +```java +package example; + +import java.io.IOException; + +import org.apache.avro.*; +import org.apache.avro.Schema.Type; +import org.apache.avro.mapred.*; +import org.apache.hadoop.conf.*; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapred.*; +import org.apache.hadoop.util.*; + +import example.avro.User; + +public class MapredColorCount extends Configured implements Tool { + + public static class ColorCountMapper extends AvroMapper> { + @Override + public void map(User user, AvroCollector> collector, Reporter reporter) + throws IOException { + CharSequence color = user.getFavoriteColor(); + // We need this check because the User.favorite_color field has type ["string", "null"] + if (color == null) { + color = "none"; + } + collector.collect(new Pair(color, 1)); + } + } + + public static class ColorCountReducer extends AvroReducer> { + @Override + public void reduce(CharSequence key, Iterable values, + AvroCollector> collector, + Reporter reporter) + throws IOException { + int sum = 0; + for (Integer value : values) { + sum += value; + } + collector.collect(new Pair(key, sum)); + } + } + + public int run(String[] args) throws Exception { + if (args.length != 2) { + System.err.println("Usage: MapredColorCount "); + return -1; + } + + JobConf conf = new JobConf(getConf(), MapredColorCount.class); + conf.setJobName("colorcount"); + + FileInputFormat.setInputPaths(conf, new Path(args[0])); + FileOutputFormat.setOutputPath(conf, new Path(args[1])); + + AvroJob.setMapperClass(conf, ColorCountMapper.class); + AvroJob.setReducerClass(conf, ColorCountReducer.class); + + // Note that AvroJob.setInputSchema and AvroJob.setOutputSchema set + // relevant config options such as input/output format, map output + // classes, and output key class. + AvroJob.setInputSchema(conf, User.getClassSchema()); + AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING), + Schema.create(Type.INT))); + + JobClient.runJob(conf); + return 0; + } + + public static void main(String[] args) throws Exception { + int res = ToolRunner.run(new Configuration(), new MapredColorCount(), args); + System.exit(res); + } +} +``` + +MapReduceColorCount.java: +```java +package example; + +import java.io.IOException; + +import org.apache.avro.Schema; +import org.apache.avro.mapred.AvroKey; +import org.apache.avro.mapred.AvroValue; +import org.apache.avro.mapreduce.AvroJob; +import org.apache.avro.mapreduce.AvroKeyInputFormat; +import org.apache.avro.mapreduce.AvroKeyValueOutputFormat; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +import example.avro.User; + +public class MapReduceColorCount extends Configured implements Tool { + + public static class ColorCountMapper extends + Mapper, NullWritable, Text, IntWritable> { + + @Override + public void map(AvroKey key, NullWritable value, Context context) + throws IOException, InterruptedException { + + CharSequence color = key.datum().getFavoriteColor(); + if (color == null) { + color = "none"; + } + context.write(new Text(color.toString()), new IntWritable(1)); + } + } + + public static class ColorCountReducer extends + Reducer, AvroValue> { + + @Override + public void reduce(Text key, Iterable values, + Context context) throws IOException, InterruptedException { + + int sum = 0; + for (IntWritable value : values) { + sum += value.get(); + } + context.write(new AvroKey(key.toString()), new AvroValue(sum)); + } + } + + public int run(String[] args) throws Exception { + if (args.length != 2) { + System.err.println("Usage: MapReduceColorCount "); + return -1; + } + + Job job = new Job(getConf()); + job.setJarByClass(MapReduceColorCount.class); + job.setJobName("Color Count"); + + FileInputFormat.setInputPaths(job, new Path(args[0])); + FileOutputFormat.setOutputPath(job, new Path(args[1])); + + job.setInputFormatClass(AvroKeyInputFormat.class); + job.setMapperClass(ColorCountMapper.class); + AvroJob.setInputKeySchema(job, User.getClassSchema()); + job.setMapOutputKeyClass(Text.class); + job.setMapOutputValueClass(IntWritable.class); + + job.setOutputFormatClass(AvroKeyValueOutputFormat.class); + job.setReducerClass(ColorCountReducer.class); + AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); + AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); + + return (job.waitForCompletion(true) ? 0 : 1); + } + + public static void main(String[] args) throws Exception { + int res = ToolRunner.run(new MapReduceColorCount(), args); + System.exit(res); + } +} +``` +ColorCount reads in data files containing *User* records, defined in _examples/user.avsc_, and counts the number of instances of each favorite color. (This example draws inspiration from the canonical _WordCount_ MapReduce application.) This example uses the old MapReduce API. See MapReduceAvroWordCount, found under _doc/examples/mr-example/src/main/java/example/_ to see the new MapReduce API example. The User schema is defined as follows: +```json +{"namespace": "example.avro", + "type": "record", + "name": "User", + "fields": [ + {"name": "name", "type": "string"}, + {"name": "favorite_number", "type": ["int", "null"]}, + {"name": "favorite_color", "type": ["string", "null"]} + ] +} +``` +This schema is compiled into the *User* class used by *ColorCount* via the Avro Maven plugin (see _examples/mr-example/pom.xml_ for how this is set up). + +*ColorCountMapper* essentially takes a *User* as input and extracts the User's favorite color, emitting the key-value pair ``. _ColorCountReducer_ then adds up how many occurrences of a particular favorite color were emitted, and outputs the result as a Pair record. These Pairs are serialized to an Avro data file. + +## Running ColorCount +The _ColorCount_ application is provided as a Maven project in the Avro docs under _examples/mr-example_. To build the project, including the code generation of the User schema, run: +```shell +mvn compile +``` +Next, run _GenerateData_ from `examples/mr-examples` to create an Avro data file, `input/users.avro`, containing 20 Users with favorite colors chosen randomly from a list: +```shell +mvn exec:java -q -Dexec.mainClass=example.GenerateData +``` +Besides creating the data file, GenerateData prints the JSON representations of the Users generated to stdout, for example: +```json +{"name": "user", "favorite_number": null, "favorite_color": "red"} +{"name": "user", "favorite_number": null, "favorite_color": "green"} +{"name": "user", "favorite_number": null, "favorite_color": "purple"} +{"name": "user", "favorite_number": null, "favorite_color": null} +... +``` +Now we're ready to run ColorCount. We specify our freshly-generated input folder as the input path and output as our output folder (note that MapReduce will not start a job if the output folder already exists): +```shell +mvn exec:java -q -Dexec.mainClass=example.MapredColorCount -Dexec.args="input output" +``` +Once ColorCount completes, checking the contents of the new output directory should yield the following: +```shell +$ ls output/ +part-00000.avro _SUCCESS +``` +You can check the contents of the generated Avro file using the avro-tools jar: +```shell +$ java -jar /path/to/avro-tools-{{< avro_version >}}.jar tojson output/part-00000.avro +{"value": 3, "key": "blue"} +{"value": 7, "key": "green"} +{"value": 1, "key": "none"} +{"value": 2, "key": "orange"} +{"value": 3, "key": "purple"} +{"value": 2, "key": "red"} +{"value": 2, "key": "yellow"} +``` +Now let's go over the ColorCount example in detail. + +## AvroMapper - org.apache.hadoop.mapred API + +The easiest way to use Avro data files as input to a MapReduce job is to subclass `AvroMapper`. An `AvroMapper` defines a `map` function that takes an Avro datum as input and outputs a key/value pair represented as a Pair record. In the ColorCount example, ColorCountMapper is an AvroMapper that takes a User as input and outputs a `Pair>`, where the CharSequence key is the user's favorite color and the Integer value is 1. +```java +public static class ColorCountMapper extends AvroMapper> { + @Override + public void map(User user, AvroCollector> collector, Reporter reporter) + throws IOException { + CharSequence color = user.getFavoriteColor(); + // We need this check because the User.favorite_color field has type ["string", "null"] + if (color == null) { + color = "none"; + } + collector.collect(new Pair(color, 1)); + } +} +``` +In order to use our AvroMapper, we must call AvroJob.setMapperClass and AvroJob.setInputSchema. +```java +AvroJob.setMapperClass(conf, ColorCountMapper.class); +AvroJob.setInputSchema(conf, User.getClassSchema()); +``` +Note that `AvroMapper` does not implement the `Mapper` interface. Under the hood, the specified Avro data files are deserialized into AvroWrappers containing the actual data, which are processed by a Mapper that calls the configured AvroMapper's map function. AvroJob.setInputSchema sets up the relevant configuration parameters needed to make this happen, thus you should not need to call `JobConf.setMapperClass`, `JobConf.setInputFormat`, `JobConf.setMapOutputKeyClass`, `JobConf.setMapOutputValueClass`, or `JobConf.setOutputKeyComparatorClass`. + +## Mapper - org.apache.hadoop.mapreduce API +This document will not go into all the differences between the mapred and mapreduce APIs, however will describe the main differences. As you can see, ColorCountMapper is now a subclass of the Hadoop Mapper class and is passed an AvroKey as it's key. Additionally, the AvroJob method calls were slightly changed. +```java + public static class ColorCountMapper extends + Mapper, NullWritable, Text, IntWritable> { + + @Override + public void map(AvroKey key, NullWritable value, Context context) + throws IOException, InterruptedException { + + CharSequence color = key.datum().getFavoriteColor(); + if (color == null) { + color = "none"; + } + context.write(new Text(color.toString()), new IntWritable(1)); + } + } +``` + +## AvroReducer - org.apache.hadoop.mapred API +Analogously to AvroMapper, an AvroReducer defines a reducer function that takes the key/value types output by an AvroMapper (or any mapper that outputs Pairs) and outputs a key/value pair represented a Pair record. In the ColorCount example, ColorCountReducer is an AvroReducer that takes the CharSequence key representing a favorite color and the `Iterable` representing the counts for that color (they should all be 1 in this example) and adds up the counts. +```java +public static class ColorCountReducer extends AvroReducer> { + @Override + public void reduce(CharSequence key, Iterable values, + AvroCollector> collector, + Reporter reporter) + throws IOException { + int sum = 0; + for (Integer value : values) { + sum += value; + } + collector.collect(new Pair(key, sum)); + } +} +``` +In order to use our AvroReducer, we must call AvroJob.setReducerClass and AvroJob.setOutputSchema. +```java +AvroJob.setReducerClass(conf, ColorCountReducer.class); +AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING), + Schema.create(Type.INT))); +``` +Note that _AvroReducer_ does not implement the _Reducer_ interface. The intermediate Pairs output by the mapper are split into _AvroKeys_ and _AvroValues_, which are processed by a Reducer that calls the configured AvroReducer's `reduce` function. `AvroJob.setOutputSchema` sets up the relevant configuration parameters needed to make this happen, thus you should not need to call `JobConf.setReducerClass`, `JobConf.setOutputFormat`, `JobConf.setOutputKeyClass`, `JobConf.setMapOutputKeyClass`, `JobConf.setMapOutputValueClass`, or `JobConf.setOutputKeyComparatorClass`. + +## Reduce - org.apache.hadoop.mapreduce API +As before we not detail every difference between the APIs. As with the _Mapper_ change _ColorCountReducer_ is now a subclass of _Reducer_ and _AvroKey_ and _AvroValue_ are emitted. Additionally, the _AvroJob_ method calls were slightly changed. +```java + public static class ColorCountReducer extends + Reducer, AvroValue> { + + @Override + public void reduce(Text key, Iterable values, + Context context) throws IOException, InterruptedException { + + int sum = 0; + for (IntWritable value : values) { + sum += value.get(); + } + context.write(new AvroKey(key.toString()), new AvroValue(sum)); + } + } +``` + +## Learning more +The mapred API allows users to mix Avro AvroMappers and AvroReducers with non-Avro Mappers and Reducers and the mapreduce API allows users input Avro and output non-Avro or vice versa. + +The mapred package has API org.apache.avro.mapred documentation as does the `org.apache.avro.mapreduce` package. MapReduce API (`org.apache.hadoop.mapreduce`). Similarily to the mapreduce package, it's possible with the mapred API to implement your own Mappers and Reducers directly using the public classes provided in these libraries. See the `AvroWordCount` application, found under _examples/mr-example/src/main/java/example/AvroWordCount.java_ in the Avro documentation, for an example of implementing a Reducer that outputs Avro data using the old MapReduce API. See the `MapReduceAvroWordCount` application, found under _examples/mr-example/src/main/java/example/MapReduceAvroWordCount.java_ in the Avro documentation, for an example of implementing a Reducer that outputs Avro data using the new MapReduce API. diff --git a/doc/content/en/docs/++version++/SASL profile/_index.md b/doc/content/en/docs/++version++/SASL profile/_index.md new file mode 100644 index 00000000000..6676d585c15 --- /dev/null +++ b/doc/content/en/docs/++version++/SASL profile/_index.md @@ -0,0 +1,95 @@ +--- +title: "SASL profile" +linkTitle: "SASL profile" +weight: 202 +aliases: +- /docs/current/sasl-profile/ +--- + + + +## Introduction +SASL ([RFC 2222](https://www.ietf.org/rfc/rfc2222.txt)) provides a framework for authentication and security of network protocols. Each protocol that uses SASL is meant to define a SASL profile. This document provides a SASL profile for connection-based Avro RPC. + +## Overview +SASL negotiation proceeds as a series of message interactions over a connection between a client and server using a selected SASL mechanism. The client starts this negotiation by sending its chosen mechanism name with an initial (possibly empty) message. Negotiation proceeds with the exchange of messages until either side indicates success or failure. The content of the messages is mechanism-specific. If the negotiation succeeds, then the session can proceed over the connection, otherwise it must be abandoned. + +Some mechanisms continue to process session data after negotiation (e.g., encrypting it), while some specify that further session data is transmitted unmodified. + +## Negotiation + +### Commands +Avro SASL negotiation uses four one-byte commands. + +* 0: START Used in a client's initial message. +* 1: CONTINUE Used while negotiation is ongoing. +* 2: FAIL Terminates negotiation unsuccessfully. +* 3: COMPLETE Terminates negotiation successfully. + +The format of a START message is: + +`| 0 | 4-byte mechanism name length | mechanism name | 4-byte payload length | payload data |` + +The format of a CONTINUE message is: + +`| 1 | 4-byte payload length | payload data |` + +The format of a FAIL message is: + +`| 2 | 4-byte message length | UTF-8 message |` + +The format of a COMPLETE message is: + +`| 3 | 4-byte payload length | payload data |` + +### Process +Negotiation is initiated by a client sending a START command containing the client's chosen mechanism name and any mechanism-specific payload data. + +The server and client then interchange some number (possibly zero) of CONTINUE messages. Each message contains payload data that is processed by the security mechanism to generate the next message. + +Once either the client or server send a FAIL message then negotiation has failed. UTF-8-encoded text is included in the failure message. Once either a FAIL message has been sent or received, or any other error occurs in the negotiation, further communication on this connection must cease. + +Once either the client or server send a COMPLETE message then negotiation has completed successfully. Session data may now be transmitted over the connection until it is closed by either side. + +## Session Data +If no SASL QOP (quality of protection) is negotiated, then all subsequent writes to/reads over this connection are written/read unmodified. In particular, messages use Avro [framing](#Message+Framing), and are of the form: + +`| 4-byte frame length | frame data | ... | 4 zero bytes |` + +If a SASL QOP is negotiated, then it must be used by the connection for all subsequent messages. This is done by wrapping each non-empty frame written using the security mechanism and unwrapping each non-empty frame read. The length written in each non-empty frame is the length of the wrapped data. Complete frames must be passed to the security mechanism for unwrapping. Unwrapped data is then passed to the application as the content of the frame. + +If at any point processing fails due to wrapping, unwrapping or framing errors, then all further communication on this connection must cease. + +## Anonymous Mechanism +The SASL anonymous mechanism ([RFC 2245](https://www.ietf.org/rfc/rfc2222.txt)) is quite simple to implement. In particular, an initial anonymous request may be prefixed by the following static sequence: + +`| 0 | 0009 | ANONYMOUS | 0000 |` + +If a server uses the anonymous mechanism, it should check that the mechanism name in the start message prefixing the first request received is 'ANONYMOUS', then simply prefix its initial response with a COMPLETE message of: + +`| 3 | 0000 |` + +If an anonymous server recieves some other mechanism name, then it may respond with a FAIL message as simple as: + +`| 2 | 0000 |` + +Note that the anonymous mechanism need add no additional round-trip messages between client and server. The START message can be piggybacked on the initial request and the COMPLETE or FAIL message can be piggybacked on the initial response. diff --git a/doc/content/en/docs/++version++/Specification/_index.md b/doc/content/en/docs/++version++/Specification/_index.md new file mode 100755 index 00000000000..950bae11762 --- /dev/null +++ b/doc/content/en/docs/++version++/Specification/_index.md @@ -0,0 +1,898 @@ +--- +title: "Specification" +linkTitle: "Specification" +weight: 4 +date: 2021-10-25 +aliases: +- spec.html +- /docs/current/specification/ +--- + + + +## Introduction +This document defines Apache Avro. It is intended to be the authoritative specification. Implementations of Avro must adhere to this document. + +## Schema Declaration {#schema-declaration} +A Schema is represented in [JSON](https://www.json.org/) by one of: + +* A JSON string, naming a defined type. +* A JSON object, of the form: +```js +{"type": "typeName", ...attributes...} +``` +where _typeName_ is either a primitive or derived type name, as defined below. Attributes not defined in this document are permitted as metadata, but must not affect the format of serialized data. +* A JSON array, representing a union of embedded types. + +## Primitive Types +The set of primitive type names is: + +* _null_: no value +* _boolean_: a binary value +* _int_: 32-bit signed integer +* _long_: 64-bit signed integer +* _float_: single precision (32-bit) IEEE 754 floating-point number +* _double_: double precision (64-bit) IEEE 754 floating-point number +* _bytes_: sequence of 8-bit unsigned bytes +* _string_: unicode character sequence + +Primitive types have no specified attributes. + +Primitive type names are also defined type names. Thus, for example, the schema "string" is equivalent to: +```json +{"type": "string"} +``` + +## Complex Types +Avro supports six kinds of complex types: _records_, _enums_, _arrays_, _maps_, _unions_ and _fixed_. + +### Records {#schema-record} +Records use the type name "record" and support the following attributes: + +* _name_: a JSON string providing the name of the record (required). +* _namespace_, a JSON string that qualifies the name (optional); +* _doc_: a JSON string providing documentation to the user of this schema (optional). +* _aliases_: a JSON array of strings, providing alternate names for this record (optional). +* _fields_: a JSON array, listing fields (required). Each field is a JSON object with the following attributes: + * _name_: a JSON string providing the name of the field (required), and + * _doc_: a JSON string describing this field for users (optional). + * _type_: a [schema]({{< ref "#schema-declaration" >}} "Schema declaration"), as defined above + * _order_: specifies how this field impacts sort ordering of this record (optional). Valid values are "ascending" (the default), "descending", or "ignore". For more details on how this is used, see the sort order section below. + * _aliases_: a JSON array of strings, providing alternate names for this field (optional). + * _default_: A default value for this field, only used when reading instances that lack the field for schema evolution purposes. The presence of a default value does not make the field optional at encoding time. Permitted values depend on the field's schema type, according to the table below. Default values for union fields correspond to the first schema that matches in the union. Default values for bytes and fixed fields are JSON strings, where Unicode code points 0-255 are mapped to unsigned 8-bit byte values 0-255. Avro encodes a field even if its value is equal to its default. + +*field default values* + +| **avro type** | **json type** | **example** | +|---------------|----------------|-------------| +| null | null | `null` | +| boolean | boolean | `true` | +| int,long | integer | `1` | +| float,double | number | `1.1` | +| bytes | string | `"\u00FF"` | +| string | string | `"foo"` | +| record | object | `{"a": 1}` | +| enum | string | `"FOO"` | +| array | array | `[1]` | +| map | object | `{"a": 1}` | +| fixed | string | `"\u00ff"` | + +For example, a linked-list of 64-bit values may be defined with: +```jsonc +{ + "type": "record", + "name": "LongList", + "aliases": ["LinkedLongs"], // old name for this + "fields" : [ + {"name": "value", "type": "long"}, // each element has a long + {"name": "next", "type": ["null", "LongList"]} // optional next element + ] +} +``` + +### Enums +Enums use the type name "enum" and support the following attributes: + +* _name_: a JSON string providing the name of the enum (required). +* _namespace_, a JSON string that qualifies the name (optional); +* _aliases_: a JSON array of strings, providing alternate names for this enum (optional). +* _doc_: a JSON string providing documentation to the user of this schema (optional). +* _symbols_: a JSON array, listing symbols, as JSON strings (required). All symbols in an enum must be unique; duplicates are prohibited. Every symbol must match the regular expression [A-Za-z_][A-Za-z0-9_]* (the same requirement as for [names]({{< ref "#names" >}} "Names")). +* _default_: A default value for this enumeration, used during resolution when the reader encounters a symbol from the writer that isn't defined in the reader's schema (optional). The value provided here must be a JSON string that's a member of the symbols array. See documentation on schema resolution for how this gets used. + +For example, playing card suits might be defined with: +```json +{ + "type": "enum", + "name": "Suit", + "symbols" : ["SPADES", "HEARTS", "DIAMONDS", "CLUBS"], + "default" : "CLUBS" +} +``` + +### Arrays +Arrays use the type name "array" and support a single attribute: + +* _items_: the schema of the array's items. + +For example, an array of strings is declared with: +```json +{ + "type": "array", + "items" : "string", +} +``` + +### Maps +Maps use the type name "map" and support one attribute: + +* _values_: the schema of the map's values. + +Map keys are assumed to be strings. + +For example, a map from string to long is declared with: +```json +{ + "type": "map", + "values" : "long", +} +``` + +### Unions +Unions, as mentioned above, are represented using JSON arrays. For example, `["null", "string"]` declares a schema which may be either a null or string. + +(Note that when a [default value]({{< ref "#schema-record" >}} "Schema record") is specified for a record field whose type is a union, the type of the default value must match with one element of the union. + +Unions may not contain more than one schema with the same type, except for the named types record, fixed and enum. For example, unions containing two array types or two map types are not permitted, but two types with different names are permitted. (Names permit efficient resolution when reading and writing unions.) + +Unions may not immediately contain other unions. + +### Fixed +Fixed uses the type name "fixed" and supports the following attributes: + +* _name_: a string naming this fixed (required). +* _namespace_, a string that qualifies the name (optional); +* _aliases_: a JSON array of strings, providing alternate names for this enum (optional). +* _size_: an integer, specifying the number of bytes per value (required). + +For example, 16-byte quantity may be declared with: +```json +{"type": "fixed", "size": 16, "name": "md5"} +``` + +### Names +Record, enums and fixed are named types. Each has a fullname that is composed of two parts: a name and a namespace, separated by a dot. Equality of names is defined on the fullname – it is an error to specify two different types with the same name. + +Record fields and enum symbols have names as well (but no namespace). Equality of field names and enum symbols is defined within their scope (the record/enum that defines them). It is an error to define multiple fields or enum symbols with the same name in a single type. Fields and enum symbols across scopes are never equal, so field names and enum symbols can be reused in a different type. + +The name portion of the fullname of named types, record field names, and enum symbols must: + +* start with [A-Za-z_] +* subsequently contain only [A-Za-z0-9_] + +A namespace is a dot-separated sequence of such names. The empty string may also be used as a namespace to indicate the null namespace. Equality of names (including field names and enum symbols) as well as fullnames is case-sensitive. + +The null namespace may not be used in a dot-separated sequence of names. So the grammar for a namespace is: +``` + | [()*] +``` + +In record, enum and fixed definitions, the fullname is determined according to the algorithm below the example: + +``` +{ + "type": "record", + "name": "Example", + "doc": "A simple name (attribute) and no namespace attribute: use the null namespace (\"\"); the fullname is 'Example'.", + "fields": [ + { + "name": "inheritNull", + "type": { + "type": "enum", + "name": "Simple", + "doc": "A simple name (attribute) and no namespace attribute: inherit the null namespace of the enclosing type 'Example'. The fullname is 'Simple'.", + "symbols": ["a", "b"] + } + }, { + "name": "explicitNamespace", + "type": { + "type": "fixed", + "name": "Simple", + "namespace": "explicit", + "doc": "A simple name (attribute) and a namespace (attribute); the fullname is 'explicit.Simple' (this is a different type than of the 'inheritNull' field).", + "size": 12 + } + }, { + "name": "fullName", + "type": { + "type": "record", + "name": "a.full.Name", + "namespace": "ignored", + "doc": "A name attribute with a fullname, so the namespace attribute is ignored. The fullname is 'a.full.Name', and the namespace is 'a.full'.", + "fields": [ + { + "name": "inheritNamespace", + "type": { + "type": "enum", + "name": "Understanding", + "doc": "A simple name (attribute) and no namespace attribute: inherit the namespace of the enclosing type 'a.full.Name'. The fullname is 'a.full.Understanding'.", + "symbols": ["d", "e"] + } + } + ] + } + } + ] +} +``` + +The fullname of a record, enum or fixed definition is determined by the required `name` and optional `namespace` attributes like this: + +* A fullname is specified. If the name specified contains a dot, then it is assumed to be a fullname, and any namespace also specified is ignored. For example, use "name": "org.foo.X" to indicate the fullname org.foo.X. +* A simple name (a name that contains no dots) and namespace are both specified. For example, one might use "name": "X", "namespace": "org.foo" to indicate the fullname org.foo.X. +* A simple name only is specified (a name that contains no dots). In this case the namespace is taken from the most tightly enclosing named schema or protocol, and the fullname is constructed from that namespace and the name. For example, if "name": "X" is specified, and this occurs within a field of the record definition of org.foo.Y, then the fullname is org.foo.X. This also happens if there is no enclosing namespace (i.e., the enclosing schema definition has the null namespace). + +References to previously defined names are as in the latter two cases above: if they contain a dot they are a fullname, if they do not contain a dot, the namespace is the namespace of the enclosing definition. + +Primitive type names (`null`, `boolean`, `int`, `long`, `float`, `double`, `bytes`, `string`) have no namespace and their names may not be defined in any namespace. + +Complex types (`record`, `enum`, `array`, `map`, `fixed`) have no namespace, but their names (as well as `union`) are permitted to be reused as type names. This can be confusing to the human reader, but is always unambiguous for binary serialization. Due to the limitations of JSON encoding, it is a best practice to use a namespace when using these names. + +A schema or protocol may not contain multiple definitions of a fullname. Further, a name must be defined before it is used ("before" in the depth-first, left-to-right traversal of the JSON parse tree, where the types attribute of a protocol is always deemed to come "before" the messages attribute.) + +### Aliases +Named types and fields may have aliases. An implementation may optionally use aliases to map a writer's schema to the reader's. This facilitates both schema evolution as well as processing disparate datasets. + +Aliases function by re-writing the writer's schema using aliases from the reader's schema. For example, if the writer's schema was named "Foo" and the reader's schema is named "Bar" and has an alias of "Foo", then the implementation would act as though "Foo" were named "Bar" when reading. Similarly, if data was written as a record with a field named "x" and is read as a record with a field named "y" with alias "x", then the implementation would act as though "x" were named "y" when reading. + +A type alias may be specified either as a fully namespace-qualified, or relative to the namespace of the name it is an alias for. For example, if a type named "a.b" has aliases of "c" and "x.y", then the fully qualified names of its aliases are "a.c" and "x.y". + +Aliases are alternative names, and thus subject to the same uniqueness constraints as names. Aliases should be valid names, but this is not required: any string is accepted as an alias. When aliases are used "to map a writer's schema to the reader's" (see above), this allows schema evolution to correct illegal names in old schemata. + +## Fixing an invalid, but previously accepted, schema +Over time, rules and validations on schemas have changed. It is therefore possible that a schema used to work with an older version of Avro, but now fails to parse. + +This can have several reasons, as listed below. Each reason also describes a fix, which can be applied using [schema resolution]({{< ref "#schema-resolution" >}}): you fix the problems in the schema in a way that is compatible, and then you can use the new schema to read the old data. + +### Invalid names +Invalid names of types and fields can be corrected by renaming (using an [alias]({{< ref "#aliases" >}})). This works for simple names, namespaces and fullnames. + +This fix is twofold: first, you add the invalid name as an alias to the type/field. Then, you change the name to any valid name. + +### Invalid defaults +Default values are only used to fill in missing data when reading. Invalid defaults create invalid values in these cases. The fix is to correct the default values. + + +## Data Serialization and Deserialization +Binary encoded Avro data does not include type information or field names. The benefit is that the serialized data is small, but as a result a schema must always be used in order to read Avro data correctly. The best way to ensure that the schema is structurally identical to the one used to write the data is to use the exact same schema. + +Therefore, files or systems that store Avro data should always include the writer's schema for that data. Avro-based remote procedure call (RPC) systems must also guarantee that remote recipients of data have a copy of the schema used to write that data. In general, it is advisable that any reader of Avro data should use a schema that is the same (as defined more fully in [Parsing Canonical Form for Schemas]({{< ref "#parsing-canonical-form-for-schemas" >}} "Parsing Canonical Form for Schemas")) as the schema that was used to write the data in order to deserialize it correctly. Deserializing data into a newer schema is accomplished by specifying an additional schema, the results of which are described in [Schema Resolution]({{< ref "#schema-resolution" >}}). + +In general, both serialization and deserialization proceed as a depth-first, left-to-right traversal of the schema, serializing or deserializing primitive types as they are encountered. Therefore, it is possible, though not advisable, to read Avro data with a schema that does not have the same Parsing Canonical Form as the schema with which the data was written. In order for this to work, the serialized primitive values must be compatible, in order value by value, with the items in the deserialization schema. For example, int and long are always serialized the same way, so an int could be deserialized as a long. Since the compatibility of two schemas depends on both the data and the serialization format (eg. binary is more permissive than JSON because JSON includes field names, eg. a long that is too large will overflow an int), it is simpler and more reliable to use schemas with identical Parsing Canonical Form. + +### Encodings +Avro specifies two serialization encodings: binary and JSON. Most applications will use the binary encoding, as it is smaller and faster. But, for debugging and web-based applications, the JSON encoding may sometimes be appropriate. + +### Binary Encoding {#binary-encoding} +Binary encoding does not include field names, self-contained information about the types of individual bytes, nor field or record separators. Therefore readers are wholly reliant on the schema used when the data was encoded. + +#### Primitive Types +Primitive types are encoded in binary as follows: + +* _null_ is written as zero bytes. +* a _boolean_ is written as a single byte whose value is either 0 (false) or 1 (true). +* _int_ and _long_ values are written using [variable-length](https://lucene.apache.org/java/3_5_0/fileformats.html#VInt) [zig-zag](https://code.google.com/apis/protocolbuffers/docs/encoding.html#types) coding. Some examples: + +| *value* | *hex* | +|---|---| +| 0 | 00 | +|-1 | 01 | +| 1 | 02 | +|-2 | 03 | +| 2 | 04 | +|...|...| +|-64 | 7f | +|64 | 80 01| +|...|...| + +* a _float_ is written as 4 bytes. The float is converted into a 32-bit integer using a method equivalent to Java's [floatToRawIntBits](https://docs.oracle.com/javase/8/docs/api/java/lang/Float.html#floatToRawIntBits-float-) and then encoded in little-endian format. +* a _double_ is written as 8 bytes. The double is converted into a 64-bit integer using a method equivalent to Java's [doubleToRawLongBits](https://docs.oracle.com/javase/8/docs/api/java/lang/Double.html#doubleToRawLongBits-double-) and then encoded in little-endian format. +* _bytes_ are encoded as a long followed by that many bytes of data. +* a _string_ is encoded as a long followed by that many bytes of UTF-8 encoded character data. +For example, the three-character string "foo" would be encoded as the long value 3 (encoded as hex 06) followed by the UTF-8 encoding of 'f', 'o', and 'o' (the hex bytes 66 6f 6f): +``` +06 66 6f 6f +``` + +### Complex Types +Complex types are encoded in binary as follows: + +#### Records +A record is encoded by encoding the values of its fields in the order that they are declared. In other words, a record is encoded as just the concatenation of the encodings of its fields. Field values are encoded per their schema. + +For example, the record schema +```json +{ + "type": "record", + "name": "test", + "fields" : [ + {"name": "a", "type": "long"}, + {"name": "b", "type": "string"} + ] +} +``` + +An instance of this record whose a field has value 27 (encoded as hex 36) and whose b field has value "foo" (encoded as hex bytes 06 66 6f 6f), would be encoded simply as the concatenation of these, namely the hex byte sequence: +``` +36 06 66 6f 6f +``` + +#### Enums +An enum is encoded by a int, representing the zero-based position of the symbol in the schema. + +For example, consider the enum: +```json +{"type": "enum", "name": "Foo", "symbols": ["A", "B", "C", "D"] } +``` + +This would be encoded by an int between zero and three, with zero indicating "A", and 3 indicating "D". + +#### Arrays +Arrays are encoded as a series of blocks. Each block consists of a long count value, followed by that many array items. A block with count zero indicates the end of the array. Each item is encoded per the array's item schema. + +If a block's count is negative, its absolute value is used, and the count is followed immediately by a long block size indicating the number of bytes in the block. This block size permits fast skipping through data, e.g., when projecting a record to a subset of its fields. + +For example, the array schema +```json +{"type": "array", "items": "long"} +``` +an array containing the items 3 and 27 could be encoded as the long value 2 (encoded as hex 04) followed by long values 3 and 27 (encoded as hex 06 36) terminated by zero: +``` +04 06 36 00 +``` + +The blocked representation permits one to read and write arrays larger than can be buffered in memory, since one can start writing items without knowing the full length of the array. + +#### Maps {#schema-maps} +Maps are encoded as a series of _blocks_. Each block consists of a `long` _count_ value, followed by that many key/value pairs. A block with count zero indicates the end of the map. Each item is encoded per the map's value schema. + +If a block's count is negative, its absolute value is used, and the count is followed immediately by a `long` block size indicating the number of bytes in the block. This block size permits fast skipping through data, e.g., when projecting a record to a subset of its fields. + +The blocked representation permits one to read and write maps larger than can be buffered in memory, since one can start writing items without knowing the full length of the map. + +#### Unions +A union is encoded by first writing an `int` value indicating the zero-based position within the union of the schema of its value. The value is then encoded per the indicated schema within the union. + +For example, the union schema `["null","string"]` would encode: + +* _null_ as zero (the index of "null" in the union): +`00` +* the string "a" as one (the index of "string" in the union, 1, encoded as hex 02), followed by the serialized string: +`02 02 61` +NOTE: Currently for C/C++ implementations, the positions are practically an int, but theoretically a long. In reality, we don't expect unions with 215M members + +#### Fixed +Fixed instances are encoded using the number of bytes declared in the schema. + +### JSON Encoding +Except for unions, the JSON encoding is the same as is used to encode [field default values]({{< ref "#schema-record" >}}). + +The value of a union is encoded in JSON as follows: + +* if its type is _null_, then it is encoded as a JSON _null_; +* otherwise it is encoded as a JSON object with one name/value pair whose name is the type's name and whose value is the recursively encoded value. For Avro's named types (record, fixed or enum) the user-specified name is used, for other types the type name is used. + +For example, the union schema `["null","string","Foo"]`, where Foo is a record name, would encode: + +* _null_ as _null_; +* the string "a" as `{"string": "a"}` and +* a Foo instance as `{"Foo": {...}}`, where `{...}` indicates the JSON encoding of a Foo instance. + +Note that the original schema is still required to correctly process JSON-encoded data. For example, the JSON encoding does not distinguish between _int_ and _long_, _float_ and _double_, records and maps, enums and strings, etc. + +### Single-object encoding +In some situations a single Avro serialized object is to be stored for a longer period of time. One very common example is storing Avro records for several weeks in an [Apache Kafka](https://kafka.apache.org/) topic. + +In the period after a schema change this persistence system will contain records that have been written with different schemas. So the need arises to know which schema was used to write a record to support schema evolution correctly. In most cases the schema itself is too large to include in the message, so this binary wrapper format supports the use case more effectively. + +#### Single object encoding specification +Single Avro objects are encoded as follows: + +1. A two-byte marker, `C3 01`, to show that the message is Avro and uses this single-record format (version 1). +1. The 8-byte little-endian CRC-64-AVRO [fingerprint]({{< ref "#schema-fingerprints" >}} "Schema fingerprints") of the object's schema. +1. The Avro object encoded using [Avro's binary encoding]({{< ref "#binary-encoding" >}}). + +Implementations use the 2-byte marker to determine whether a payload is Avro. This check helps avoid expensive lookups that resolve the schema from a fingerprint, when the message is not an encoded Avro payload. + +## Sort Order +Avro defines a standard sort order for data. This permits data written by one system to be efficiently sorted by another system. This can be an important optimization, as sort order comparisons are sometimes the most frequent per-object operation. Note also that Avro binary-encoded data can be efficiently ordered without deserializing it to objects. + +Data items may only be compared if they have identical schemas. Pairwise comparisons are implemented recursively with a depth-first, left-to-right traversal of the schema. The first mismatch encountered determines the order of the items. + +Two items with the same schema are compared according to the following rules. + +* _null_ data is always equal. +* _boolean_ data is ordered with false before true. +* _int_, _long_, _float_ and _double_ data is ordered by ascending numeric value. +* _bytes_ and fixed data are compared lexicographically by unsigned 8-bit values. +* _string_ data is compared lexicographically by Unicode code point. Note that since UTF-8 is used as the binary encoding for strings, sorting of bytes and string binary data is identical. +* _array_ data is compared lexicographically by element. +* _enum_ data is ordered by the symbol's position in the enum schema. For example, an enum whose symbols are `["z", "a"]` would sort "z" values before "a" values. +* _union_ data is first ordered by the branch within the union, and, within that, by the type of the branch. For example, an `["int", "string"]` union would order all int values before all string values, with the ints and strings themselves ordered as defined above. +* _record_ data is ordered lexicographically by field. If a field specifies that its order is: + * "ascending", then the order of its values is unaltered. + * "descending", then the order of its values is reversed. + * "ignore", then its values are ignored when sorting. +* _map_ data may not be compared. It is an error to attempt to compare data containing maps unless those maps are in an `"order":"ignore"` record field. + +## Object Container Files +Avro includes a simple object container file format. A file has a schema, and all objects stored in the file must be written according to that schema, using binary encoding. Objects are stored in blocks that may be compressed. Synchronization markers are used between blocks to permit efficient splitting of files for MapReduce processing. + +Files may include arbitrary user-specified metadata. + +A file consists of: + +* A file header, followed by +* one or more file data blocks. + +A file header consists of: + +* Four bytes, ASCII 'O', 'b', 'j', followed by 1. +* file metadata, including the schema. +* The 16-byte, randomly-generated sync marker for this file. + +File metadata is written as if defined by the following [map]({{< ref "#schema-maps" >}}) schema: +```json +{"type": "map", "values": "bytes"} +``` +All metadata properties that start with "avro." are reserved. The following file metadata properties are currently used: + +* **avro.schema** contains the schema of objects stored in the file, as JSON data (required). +* **avro.codec** the name of the compression codec used to compress blocks, as a string. Implementations are required to support the following codecs: "null" and "deflate". If codec is absent, it is assumed to be "null". The codecs are described with more detail below. + +A file header is thus described by the following schema: +```json +{"type": "record", "name": "org.apache.avro.file.Header", + "fields" : [ + {"name": "magic", "type": {"type": "fixed", "name": "Magic", "size": 4}}, + {"name": "meta", "type": {"type": "map", "values": "bytes"}}, + {"name": "sync", "type": {"type": "fixed", "name": "Sync", "size": 16}} + ] +} +``` + +A file data block consists of: + +* A long indicating the count of objects in this block. +* A long indicating the size in bytes of the serialized objects in the current block, after any codec is applied +* The serialized objects. If a codec is specified, this is compressed by that codec. +* The file's 16-byte sync marker. + +A file data block is thus described by the following schema: +```json +{"type": "record", "name": "org.apache.avro.file.DataBlock", + "fields" : [ + {"name": "count", "type": "long"}, + {"name": "data", "type": "bytes"}, + {"name": "sync", "type": {"type": "fixed", "name": "Sync", "size": 16}} + ] +} +``` + +Each block's binary data can be efficiently extracted or skipped without deserializing the contents. The combination of block size, object counts, and sync markers enable detection of corrupt blocks and help ensure data integrity. + +### Required Codecs + +_null_ + +The "null" codec simply passes through data uncompressed. + +_deflate_ + +The "deflate" codec writes the data block using the deflate algorithm as specified in [RFC 1951](https://www.isi.edu/in-notes/rfc1951.txt), and typically implemented using the zlib library. Note that this format (unlike the "zlib format" in RFC 1950) does not have a checksum. + +### Optional Codecs +_bzip2_ + +The "bzip2" codec uses the [bzip2](https://sourceware.org/bzip2/) compression library. + +_snappy_ + +The "snappy" codec uses Google's [Snappy](https://code.google.com/p/snappy/) compression library. Each compressed block is followed by the 4-byte, big-endian CRC32 checksum of the uncompressed data in the block. + +_xz_ + +The "xz" codec uses the [XZ](https://tukaani.org/xz/) compression library. + +_zstandard_ + +The "zstandard" codec uses Facebook's [Zstandard](https://facebook.github.io/zstd/) compression library. + +### Protocol Declaration +Avro protocols describe RPC interfaces. Like schemas, they are defined with JSON text. + +A protocol is a JSON object with the following attributes: + +* _protocol_, a string, the name of the protocol (required); +* _namespace_, an optional string that qualifies the name (optional); +* _doc_, an optional string describing this protocol; +* _types_, an optional list of definitions of named types (records, enums, fixed and errors). An error definition is just like a record definition except it uses "error" instead of "record". Note that forward references to named types are not permitted. +* _messages_, an optional JSON object whose keys are message names and whose values are objects whose attributes are described below. No two messages may have the same name. + +The name and namespace qualification rules defined for schema objects apply to protocols as well. + +### Messages +A message has attributes: + +* a _doc_, an optional description of the message, +* a _request_, a list of named, typed parameter schemas (this has the same form as the fields of a record declaration); +* a _response_ schema; +* an optional union of declared error schemas. The effective union has "string" prepended to the declared union, to permit transmission of undeclared "system" errors. For example, if the declared error union is `["AccessError"]`, then the effective union is `["string", "AccessError"]`. When no errors are declared, the effective error union is `["string"]`. Errors are serialized using the effective union; however, a protocol's JSON declaration contains only the declared union. +* an optional one-way boolean parameter. + +A request parameter list is processed equivalently to an anonymous record. Since record field lists may vary between reader and writer, request parameters may also differ between the caller and responder, and such differences are resolved in the same manner as record field differences. + +The one-way parameter may only be true when the response type is `"null"` and no errors are listed. + +### Sample Protocol +For example, one may define a simple HelloWorld protocol with: +```json +{ + "namespace": "com.acme", + "protocol": "HelloWorld", + "doc": "Protocol Greetings", + + "types": [ + {"name": "Greeting", "type": "record", "fields": [ + {"name": "message", "type": "string"}]}, + {"name": "Curse", "type": "error", "fields": [ + {"name": "message", "type": "string"}]} + ], + + "messages": { + "hello": { + "doc": "Say hello.", + "request": [{"name": "greeting", "type": "Greeting" }], + "response": "Greeting", + "errors": ["Curse"] + } + } +} +``` + +## Protocol Wire Format + +### Message Transport +Messages may be transmitted via different transport mechanisms. + +To the transport, a _message_ is an opaque byte sequence. + +A transport is a system that supports: + +* **transmission of request messages** +* **receipt of corresponding response messages** +Servers may send a response message back to the client corresponding to a request message. The mechanism of correspondence is transport-specific. For example, in HTTP it is implicit, since HTTP directly supports requests and responses. But a transport that multiplexes many client threads over a single socket would need to tag messages with unique identifiers. + +Transports may be either stateless or stateful. In a stateless transport, messaging assumes no established connection state, while stateful transports establish connections that may be used for multiple messages. This distinction is discussed further in the [handshake](#handshake) section below. + +#### HTTP as Transport +When [HTTP](https://www.w3.org/Protocols/rfc2616/rfc2616.html) is used as a transport, each Avro message exchange is an HTTP request/response pair. All messages of an Avro protocol should share a single URL at an HTTP server. Other protocols may also use that URL. Both normal and error Avro response messages should use the 200 (OK) response code. The chunked encoding may be used for requests and responses, but, regardless the Avro request and response are the entire content of an HTTP request and response. The HTTP Content-Type of requests and responses should be specified as "avro/binary". Requests should be made using the POST method. + +HTTP is used by Avro as a stateless transport. + +### Message Framing +Avro messages are _framed_ as a list of buffers. + +Framing is a layer between messages and the transport. It exists to optimize certain operations. + +The format of framed message data is: + +* a series of buffers, where each buffer consists of: + * a four-byte, big-endian _buffer length_, followed by + * that many bytes of _buffer_ data. +* a message is always terminated by a zero-length buffer. + +Framing is transparent to request and response message formats (described below). Any message may be presented as a single or multiple buffers. + +Framing can permit readers to more efficiently get different buffers from different sources and for writers to more efficiently store different buffers to different destinations. In particular, it can reduce the number of times large binary objects are copied. For example, if an RPC parameter consists of a megabyte of file data, that data can be copied directly to a socket from a file descriptor, and, on the other end, it could be written directly to a file descriptor, never entering user space. + +A simple, recommended, framing policy is for writers to create a new segment whenever a single binary object is written that is larger than a normal output buffer. Small objects are then appended in buffers, while larger objects are written as their own buffers. When a reader then tries to read a large object the runtime can hand it an entire buffer directly, without having to copy it. + +### Handshake +The purpose of the handshake is to ensure that the client and the server have each other's protocol definition, so that the client can correctly deserialize responses, and the server can correctly deserialize requests. Both clients and servers should maintain a cache of recently seen protocols, so that, in most cases, a handshake will be completed without extra round-trip network exchanges or the transmission of full protocol text. + +RPC requests and responses may not be processed until a handshake has been completed. With a stateless transport, all requests and responses are prefixed by handshakes. With a stateful transport, handshakes are only attached to requests and responses until a successful handshake response has been returned over a connection. After this, request and response payloads are sent without handshakes for the lifetime of that connection. + +The handshake process uses the following record schemas: +```json +{ + "type": "record", + "name": "HandshakeRequest", "namespace":"org.apache.avro.ipc", + "fields": [ + {"name": "clientHash", + "type": {"type": "fixed", "name": "MD5", "size": 16}}, + {"name": "clientProtocol", "type": ["null", "string"]}, + {"name": "serverHash", "type": "MD5"}, + {"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]} + ] +} +{ + "type": "record", + "name": "HandshakeResponse", "namespace": "org.apache.avro.ipc", + "fields": [ + {"name": "match", + "type": {"type": "enum", "name": "HandshakeMatch", + "symbols": ["BOTH", "CLIENT", "NONE"]}}, + {"name": "serverProtocol", + "type": ["null", "string"]}, + {"name": "serverHash", + "type": ["null", {"type": "fixed", "name": "MD5", "size": 16}]}, + {"name": "meta", + "type": ["null", {"type": "map", "values": "bytes"}]} + ] +} +``` + +* A client first prefixes each request with a `HandshakeRequest` containing just the hash of its protocol and of the server's protocol (`clientHash!=null, clientProtocol=null, serverHash!=null`), where the hashes are 128-bit MD5 hashes of the JSON protocol text. If a client has never connected to a given server, it sends its hash as a guess of the server's hash, otherwise it sends the hash that it previously obtained from this server. +The server responds with a HandshakeResponse containing one of: + * `match=BOTH, serverProtocol=null, serverHash=null` if the client sent the valid hash of the server's protocol and the server knows what protocol corresponds to the client's hash. In this case, the request is complete and the response data immediately follows the HandshakeResponse. + * `match=CLIENT, serverProtocol!=null, serverHash!=null` if the server has previously seen the client's protocol, but the client sent an incorrect hash of the server's protocol. The request is complete and the response data immediately follows the HandshakeResponse. The client must use the returned protocol to process the response and should also cache that protocol and its hash for future interactions with this server. + * `match=NONE` if the server has not previously seen the client's protocol. The serverHash and serverProtocol may also be non-null if the server's protocol hash was incorrect. +In this case the client must then re-submit its request with its protocol text (`clientHash!=null, clientProtocol!=null, serverHash!=null`) and the server should respond with a successful match (match=BOTH, serverProtocol=null, serverHash=null) as above. + +The meta field is reserved for future handshake enhancements. + +### Call Format +A _call_ consists of a request message paired with its resulting response or error message. Requests and responses contain extensible metadata, and both kinds of messages are framed as described above. + +The format of a call request is: + +* _request metadata_, a map with values of type bytes +* the _message name_, an Avro string, followed by +* the _message parameters_. Parameters are serialized according to the message's request declaration. +When the empty string is used as a message name a server should ignore the parameters and return an empty response. A client may use this to ping a server or to perform a handshake without sending a protocol message. + +When a message is declared one-way and a stateful connection has been established by a successful handshake response, no response data is sent. Otherwise the format of the call response is: + +* _response metadata_, a map with values of type bytes +* a one-byte error _flag_ boolean, followed by either: + * if the error flag is false, the message _response_, serialized per the message's response schema. + * if the error flag is true, the _error_, serialized per the message's effective error union schema. + +### Schema Resolution {#schema-resolution} +A reader of Avro data, whether from an RPC or a file, can always parse that data because the original schema must be provided along with the data. However, the reader may be programmed to read data into a different schema. For example, if the data was written with a different version of the software than it is read, then fields may have been added or removed from records. This section specifies how such schema differences should be resolved. + +We refer to the schema used to write the data as the writer's schema, and the schema that the application expects the reader's schema. Differences between these should be resolved as follows: + +* It is an error if the two schemas do not _match_. +To match, one of the following must hold: + * both schemas are arrays whose item types match + * both schemas are maps whose value types match + * both schemas are enums whose (unqualified) names match + * both schemas are fixed whose sizes and (unqualified) names match + * both schemas are records with the same (unqualified) name + * either schema is a union + * both schemas have same primitive type + * the writer's schema may be promoted to the reader's as follows: + * int is promotable to long, float, or double + * long is promotable to float or double + * float is promotable to double + * string is promotable to bytes + * bytes is promotable to string +* **if both are records**: + * the ordering of fields may be different: fields are matched by name. + * schemas for fields with the same name in both records are resolved recursively. + * if the writer's record contains a field with a name not present in the reader's record, the writer's value for that field is ignored. + * if the reader's record schema has a field that contains a default value, and writer's schema does not have a field with the same name, then the reader should use the default value from its field. + * if the reader's record schema has a field with no default value, and writer's schema does not have a field with the same name, an error is signalled. +* **if both are enums**: +if the writer's symbol is not present in the reader's enum and the reader has a default value, then that value is used, otherwise an error is signalled. + +* **if both are arrays**: +This resolution algorithm is applied recursively to the reader's and writer's array item schemas. + +* **if both are maps**: +This resolution algorithm is applied recursively to the reader's and writer's value schemas. + +* **if both are unions**: +The first schema in the reader's union that matches the selected writer's union schema is recursively resolved against it. if none match, an error is signalled. + +* **if reader's is a union, but writer's is not** +The first schema in the reader's union that matches the writer's schema is recursively resolved against it. If none match, an error is signalled. + +* **if writer's is a union, but reader's is not** +If the reader's schema matches the selected writer's schema, it is recursively resolved against it. If they do not match, an error is signalled. + +A schema's _doc_ fields are ignored for the purposes of schema resolution. Hence, the _doc_ portion of a schema may be dropped at serialization. + +### Parsing Canonical Form for Schemas {#parsing-canonical-form-for-schemas} +One of the defining characteristics of Avro is that a reader must use the schema used by the writer of the data in order to know how to read the data. This assumption results in a data format that's compact and also amenable to many forms of schema evolution. However, the specification so far has not defined what it means for the reader to have the "same" schema as the writer. Does the schema need to be textually identical? Well, clearly adding or removing some whitespace to a JSON expression does not change its meaning. At the same time, reordering the fields of records clearly does change the meaning. So what does it mean for a reader to have "the same" schema as a writer? + +Parsing Canonical Form is a transformation of a writer's schema that let's us define what it means for two schemas to be "the same" for the purpose of reading data written against the schema. It is called Parsing Canonical Form because the transformations strip away parts of the schema, like "doc" attributes, that are irrelevant to readers trying to parse incoming data. It is called Canonical Form because the transformations normalize the JSON text (such as the order of attributes) in a way that eliminates unimportant differences between schemas. If the Parsing Canonical Forms of two different schemas are textually equal, then those schemas are "the same" as far as any reader is concerned, i.e., there is no serialized data that would allow a reader to distinguish data generated by a writer using one of the original schemas from data generated by a writing using the other original schema. (We sketch a proof of this property in a companion document.) + +The next subsection specifies the transformations that define Parsing Canonical Form. But with a well-defined canonical form, it can be convenient to go one step further, transforming these canonical forms into simple integers ("fingerprints") that can be used to uniquely identify schemas. The subsection after next recommends some standard practices for generating such fingerprints. + +#### Transforming into Parsing Canonical Form +Assuming an input schema (in JSON form) that's already UTF-8 text for a _valid_ Avro schema (including all quotes as required by JSON), the following transformations will produce its Parsing Canonical Form: + +* [PRIMITIVES] Convert primitive schemas to their simple form (e.g., int instead of `{"type":"int"}`). +* [FULLNAMES] Replace short names with fullnames, using applicable namespaces to do so. Then eliminate namespace attributes, which are now redundant. +* [STRIP] Keep only attributes that are relevant to parsing data, which are: _type_, _name_, _fields_, _symbols_, _items_, _values_, _size_. Strip all others (e.g., _doc_ and _aliases_). +* [ORDER] Order the appearance of fields of JSON objects as follows: _name_, _type_, _fields_, _symbols_, _items_, _values_, _size_. For example, if an object has _type_, _name_, and _size_ fields, then the _name_ field should appear first, followed by the _type_ and then the _size_ fields. +* [STRINGS] For all JSON string literals in the schema text, replace any escaped characters (e.g., \uXXXX escapes) with their UTF-8 equivalents. +* [INTEGERS] Eliminate quotes around and any leading zeros in front of JSON integer literals (which appear in the _size_ attributes of _fixed_ schemas). +* [WHITESPACE] Eliminate all whitespace in JSON outside of string literals. + +#### Schema Fingerprints {#schema-fingerprints} +"[A] fingerprinting algorithm is a procedure that maps an arbitrarily large data item (such as a computer file) to a much shorter bit string, its fingerprint, that uniquely identifies the original data for all practical purposes" (quoted from [Wikipedia](https://en.wikipedia.org/wiki/Fingerprint_(computing))). In the Avro context, fingerprints of Parsing Canonical Form can be useful in a number of applications; for example, to cache encoder and decoder objects, to tag data items with a short substitute for the writer's full schema, and to quickly negotiate common-case schemas between readers and writers. + +In designing fingerprinting algorithms, there is a fundamental trade-off between the length of the fingerprint and the probability of collisions. To help application designers find appropriate points within this trade-off space, while encouraging interoperability and ease of implementation, we recommend using one of the following three algorithms when fingerprinting Avro schemas: + +* When applications can tolerate longer fingerprints, we recommend using the [SHA-256 digest algorithm](https://en.wikipedia.org/wiki/SHA-2) to generate 256-bit fingerprints of Parsing Canonical Forms. Most languages today have SHA-256 implementations in their libraries. +* At the opposite extreme, the smallest fingerprint we recommend is a 64-bit [Rabin fingerprint](https://en.wikipedia.org/wiki/Rabin_fingerprint). Below, we provide pseudo-code for this algorithm that can be easily translated into any programming language. 64-bit fingerprints should guarantee uniqueness for schema caches of up to a million entries (for such a cache, the chance of a collision is 3E-8). We don't recommend shorter fingerprints, as the chances of collisions is too great (for example, with 32-bit fingerprints, a cache with as few as 100,000 schemas has a 50% chance of having a collision). +* Between these two extremes, we recommend using the [MD5 message digest](https://en.wikipedia.org/wiki/MD5) to generate 128-bit fingerprints. These make sense only where very large numbers of schemas are being manipulated (tens of millions); otherwise, 64-bit fingerprints should be sufficient. As with SHA-256, MD5 implementations are found in most libraries today. + +These fingerprints are not meant to provide any security guarantees, even the longer SHA-256-based ones. Most Avro applications should be surrounded by security measures that prevent attackers from writing random data and otherwise interfering with the consumers of schemas. We recommend that these surrounding mechanisms be used to prevent collision and pre-image attacks (i.e., "forgery") on schema fingerprints, rather than relying on the security properties of the fingerprints themselves. + +Rabin fingerprints are [cyclic redundancy checks](https://en.wikipedia.org/wiki/Cyclic_redundancy_check) computed using irreducible polynomials. In the style of the Appendix of [RFC 1952](https://www.ietf.org/rfc/rfc1952.txt) (pg 10), which defines the CRC-32 algorithm, here's our definition of the 64-bit AVRO fingerprinting algorithm: +```java +long fingerprint64(byte[] buf) { + if (FP_TABLE == null) initFPTable(); + long fp = EMPTY; + for (int i = 0; i < buf.length; i++) + fp = (fp >>> 8) ^ FP_TABLE[(int)(fp ^ buf[i]) & 0xff]; + return fp; +} + +static long EMPTY = 0xc15d213aa4d7a795L; +static long[] FP_TABLE = null; + +void initFPTable() { + FP_TABLE = new long[256]; + for (int i = 0; i < 256; i++) { + long fp = i; + for (int j = 0; j < 8; j++) + fp = (fp >>> 1) ^ (EMPTY & -(fp & 1L)); + FP_TABLE[i] = fp; + } +} +``` + +Readers interested in the mathematics behind this algorithm may want to read [Chapter 14 of the Second Edition of Hacker's Delight](https://books.google.com/books?id=XD9iAwAAQBAJ&pg=PA319). (Unlike RFC-1952 and the book chapter, we prepend a single one bit to messages. We do this because CRCs ignore leading zero bits, which can be problematic. Our code prepends a one-bit by initializing fingerprints using EMPTY, rather than initializing using zero as in RFC-1952 and the book chapter.) + +## Logical Types +A logical type is an Avro primitive or complex type with extra attributes to represent a derived type. The attribute `logicalType` must always be present for a logical type, and is a string with the name of one of the logical types listed later in this section. Other attributes may be defined for particular logical types. + +A logical type is always serialized using its underlying Avro type so that values are encoded in exactly the same way as the equivalent Avro type that does not have a `logicalType` attribute. Language implementations may choose to represent logical types with an appropriate native type, although this is not required. + +Language implementations must ignore unknown logical types when reading, and should use the underlying Avro type. If a logical type is invalid, for example a decimal with scale greater than its precision, then implementations should ignore the logical type and use the underlying Avro type. + +### Decimal + +#### Fixed precision +The `decimal` logical type represents an arbitrary-precision signed decimal number of the form _unscaled × 10-scale_. + +A `decimal` logical type annotates Avro _bytes_ or _fixed_ types. The byte array must contain the two's-complement representation of the unscaled integer value in big-endian byte order. The scale is fixed, and is specified using an attribute. + +The following attributes are supported: + +* _scale_, a JSON integer representing the scale (optional). If not specified the scale is 0. +* _precision_, a JSON integer representing the (maximum) precision of decimals stored in this type (required). +For example, the following schema represents decimal numbers with a maximum precision of 4 and a scale of 2: +```json +{ + "type": "bytes", + "logicalType": "decimal", + "precision": 4, + "scale": 2 +} +``` +Precision must be a positive integer greater than zero. If the underlying type is a _fixed_, then the precision is limited by its size. An array of length n can store at most _floor(log10(28 × n - 1 - 1))_ base-10 digits of precision. + +Scale must be zero or a positive integer less than or equal to the precision. + +For the purposes of schema resolution, two schemas that are `decimal` logical types _match_ if their scales and precisions match. + +#### Scalable precision + +As it's not always possible to fix scale and precision in advance for a decimal field, `big-decimal` is another `decimal` logical type restrict to Avro _bytes_. + +_Currently only available in C++, Java and Rust_. + +```json +{ + "type": "bytes", + "logicalType": "big-decimal" +} +``` +Here, bytes array contains two serialized properties. First part is an Avro byte arrays which is the two's-complement representation of the unscaled integer value in big-endian byte order. The second part is the scale property stored as an Avro integer. Scale must be zero or a positive integer less than or equal to the precision. Value itself needs more bytes than preceding `decimal` type, but it allows more flexibility. + +### UUID + +The `uuid` logical type represents a random generated universally unique identifier (UUID). + +A `uuid` logical type annotates an Avro `string` or `fixed` of length 16. Both the string and `fixed` byte layout have to conform with [RFC-4122](https://www.ietf.org/rfc/rfc4122.txt). + +The following schemas represent a uuid: + +```json +{ + "type": "string", + "logicalType": "uuid" +} +``` + +```json +{ + "type": "fixed", + "size": 16, + "logicalType": "uuid" +} +``` + +### Date +The `date` logical type represents a date within the calendar, with no reference to a particular time zone or time of day. + +A `date` logical type annotates an Avro `int`, where the int stores the number of days from the unix epoch, 1 January 1970 (ISO calendar). + +The following schema represents a date: +```json +{ + "type": "int", + "logicalType": "date" +} +``` + +### Time (millisecond precision) {#time_ms} +The `time-millis` logical type represents a time of day, with no reference to a particular calendar, time zone or date, with a precision of one millisecond. + +A `time-millis` logical type annotates an Avro `int`, where the int stores the number of milliseconds after midnight, 00:00:00.000. + +### Time (microsecond precision) +The `time-micros` logical type represents a time of day, with no reference to a particular calendar, time zone or date, with a precision of one microsecond. + +A `time-micros` logical type annotates an Avro `long`, where the long stores the number of microseconds after midnight, 00:00:00.000000. + +### Timestamps {#timestamps} + +The `timestamp-{millis,micros,nanos}` logical type represents an instant on the global timeline, independent of a particular time zone or calendar. Upon reading a value back, we can only reconstruct the instant, but not the original representation. In practice, such timestamps are typically displayed to users in their local time zones, therefore they may be displayed differently depending on the execution environment. + +- `timestamp-millis`: logical type annotates an Avro `long`, where the long stores the number of milliseconds from the unix epoch, 1 January 1970 00:00:00.000. +- `timestamp-micros`: logical type annotates an Avro `long`, where the long stores the number of microseconds from the unix epoch, 1 January 1970 00:00:00.000000. +- `timestamp-nanos`: logical type annotates an Avro `long`, where the long stores the number of nanoseconds from the unix epoch, 1 January 1970 00:00:00.000000000. + +Example: Given an event at noon local time (12:00) on January 1, 2000, in Helsinki where the local time was two hours east of UTC (UTC+2). The timestamp is first shifted to UTC 2000-01-01T10:00:00 and that is then converted to Avro long 946720800000 (milliseconds) and written. + +### Local Timestamps {#local_timestamp} + +The `local-timestamp-{millis,micros,nanos}` logical type represents a timestamp in a local timezone, regardless of what specific time zone is considered local. + +- `local-timestamp-millis`: logical type annotates an Avro `long`, where the long stores the number of milliseconds, from 1 January 1970 00:00:00.000. +- `local-timestamp-micros`: logical type annotates an Avro `long`, where the long stores the number of microseconds, from 1 January 1970 00:00:00.000000. +- `local-timestamp-nanos`: logical type annotates an Avro `long`, where the long stores the number of nanoseconds, from 1 January 1970 00:00:00.000000000. + +Example: Given an event at noon local time (12:00) on January 1, 2000, in Helsinki where the local time was two hours east of UTC (UTC+2). The timestamp is converted to Avro long 946728000000 (milliseconds) and then written. + +### Duration +The `duration` logical type represents an amount of time defined by a number of months, days and milliseconds. This is not equivalent to a number of milliseconds, because, depending on the moment in time from which the duration is measured, the number of days in the month and number of milliseconds in a day may differ. Other standard periods such as years, quarters, hours and minutes can be expressed through these basic periods. + +A `duration` logical type annotates Avro `fixed` type of size 12, which stores three little-endian unsigned integers that represent durations at different granularities of time. The first stores a number in months, the second stores a number in days, and the third stores a number in milliseconds. diff --git a/doc/content/en/docs/++version++/_index.md b/doc/content/en/docs/++version++/_index.md new file mode 100755 index 00000000000..c67a74d82d5 --- /dev/null +++ b/doc/content/en/docs/++version++/_index.md @@ -0,0 +1,61 @@ +--- +title: "Apache Avroâ„ĸ ++version++ Documentation" +linkTitle: "++version++" +type: docs +aliases: + - /docs/current/ +weight: -9999 +--- + + + +## Introduction + +Apache Avroâ„ĸ is a data serialization system. + +Avro provides: + +* Rich data structures. +* A compact, fast, binary data format. +* A container file, to store persistent data. +* Remote procedure call (RPC). +* Simple integration with dynamic languages. Code generation is not required to read or write data files nor to use or implement RPC protocols. Code generation as an optional optimization, only worth implementing for statically typed languages. + +## Schemas + +Avro relies on schemas. When Avro data is read, the schema used when writing it is always present. This permits each datum to be written with no per-value overheads, making serialization both fast and small. This also facilitates use with dynamic, scripting languages, since data, together with its schema, is fully self-describing. + +When Avro data is stored in a file, its schema is stored with it, so that files may be processed later by any program. If the program reading the data expects a different schema this can be easily resolved, since both schemas are present. + +When Avro is used in RPC, the client and server exchange schemas in the connection handshake. (This can be optimized so that, for most calls, no schemas are actually transmitted.) Since both client and server both have the other's full schema, correspondence between same named fields, missing fields, extra fields, etc. can all be easily resolved. + +Avro schemas are defined with JSON . This facilitates implementation in languages that already have JSON libraries. + +## Comparison with other systems + +Avro provides functionality similar to systems such as [Thrift](https://thrift.apache.org/), [Protocol Buffers](https://code.google.com/p/protobuf/), etc. Avro differs from these systems in the following fundamental aspects. + +* Dynamic typing: Avro does not require that code be generated. Data is always accompanied by a schema that permits full processing of that data without code generation, static datatypes, etc. This facilitates construction of generic data-processing systems and languages. +* Untagged data: Since the schema is present when data is read, considerably less type information need be encoded with data, resulting in smaller serialization size. +* No manually-assigned field IDs: When a schema changes, both the old and new schema are always present when processing data, so differences may be resolved symbolically, using field names. + + diff --git a/doc/content/en/docs/++version++/api-c++.md b/doc/content/en/docs/++version++/api-c++.md new file mode 100644 index 00000000000..4382750a46a --- /dev/null +++ b/doc/content/en/docs/++version++/api-c++.md @@ -0,0 +1,29 @@ +--- +title: "C++ API" +linkTitle: "C++ API" +weight: 102 +manualLink: /docs/++version++/api/cpp/html/ +--- + + + +The C++ API documentation can be found here. diff --git a/doc/content/en/docs/++version++/api-c.md b/doc/content/en/docs/++version++/api-c.md new file mode 100644 index 00000000000..79a5209e526 --- /dev/null +++ b/doc/content/en/docs/++version++/api-c.md @@ -0,0 +1,29 @@ +--- +title: "C API" +linkTitle: "C API" +weight: 101 +manualLink: /docs/++version++/api/c/ +--- + + + +The C API documentation can be found here. diff --git a/doc/content/en/docs/++version++/api-csharp.md b/doc/content/en/docs/++version++/api-csharp.md new file mode 100644 index 00000000000..cfad0d1e343 --- /dev/null +++ b/doc/content/en/docs/++version++/api-csharp.md @@ -0,0 +1,29 @@ +--- +title: "C# API" +linkTitle: "C# API" +weight: 103 +manualLink: /docs/++version++/api/csharp/html/ +--- + + + +The C# API documentation can be found here. diff --git a/doc/content/en/docs/++version++/api-java.md b/doc/content/en/docs/++version++/api-java.md new file mode 100644 index 00000000000..12d743567df --- /dev/null +++ b/doc/content/en/docs/++version++/api-java.md @@ -0,0 +1,29 @@ +--- +title: "Java API" +linkTitle: "Java API" +weight: 100 +manualLink: /docs/++version++/api/java/ +--- + + + +The Javadocs can be found here. diff --git a/doc/content/en/docs/++version++/api-py.md b/doc/content/en/docs/++version++/api-py.md new file mode 100644 index 00000000000..fb4f4ba13b5 --- /dev/null +++ b/doc/content/en/docs/++version++/api-py.md @@ -0,0 +1,29 @@ +--- +title: "Python API" +linkTitle: "Python API" +weight: 104 +manualLink: /docs/++version++/api/py/html/ +--- + + + +The Python API documentation can be found here. diff --git a/doc/content/en/docs/++version++/api-rust.md b/doc/content/en/docs/++version++/api-rust.md new file mode 100644 index 00000000000..1906379540b --- /dev/null +++ b/doc/content/en/docs/++version++/api-rust.md @@ -0,0 +1,29 @@ +--- +title: "Rust API" +linkTitle: "Rust API" +weight: 105 +manualLink: /docs/++version++/api/rust/apache_avro/ +--- + + + +The Rust API documentation can be found here. diff --git a/doc/content/en/docs/1.11.1/Getting started (Java)/_index.md b/doc/content/en/docs/1.11.1/Getting started (Java)/_index.md new file mode 100644 index 00000000000..7731dae06f9 --- /dev/null +++ b/doc/content/en/docs/1.11.1/Getting started (Java)/_index.md @@ -0,0 +1,289 @@ +--- +categories: [] +tags: ["java"] +title: "Getting Started (Java)" +linkTitle: "Getting Started (Java)" +weight: 2 +--- + + + +This is a short guide for getting started with Apache Avroâ„ĸ using Java. This guide only covers using Avro for data serialization; see Patrick Hunt's [Avro RPC Quick Start](https://github.com/phunt/avro-rpc-quickstart) for a good introduction to using Avro for RPC. + +## Download + +Avro implementations for C, C++, C#, Java, PHP, Python, and Ruby can be downloaded from the [Apache Avroâ„ĸ Download]({{< relref "/project/download" >}}) page. This guide uses Avro 1.11.1, the latest version at the time of writing. For the examples in this guide, download avro-1.11.1.jar and avro-tools-1.11.1.jar. + +Alternatively, if you are using Maven, add the following dependency to your POM: + +```xml + + org.apache.avro + avro + 1.11.1 + +``` + +As well as the Avro Maven plugin (for performing code generation): + +```xml + + org.apache.avro + avro-maven-plugin + 1.11.1 + + + generate-sources + + schema + + + ${project.basedir}/src/main/avro/ + ${project.basedir}/src/main/java/ + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + 1.8 + 1.8 + + +``` + +You may also build the required Avro jars from source. Building Avro is beyond the scope of this guide; see the Build Documentation page in the wiki for more information. + +## Defining a schema + +Avro schemas are defined using JSON. Schemas are composed of primitive types (null, boolean, int, long, float, double, bytes, and string) and complex types (record, enum, array, map, union, and fixed). You can learn more about Avro schemas and types from the specification, but for now let's start with a simple schema example, user.avsc: + +```json +{"namespace": "example.avro", + "type": "record", + "name": "User", + "fields": [ + {"name": "name", "type": "string"}, + {"name": "favorite_number", "type": ["int", "null"]}, + {"name": "favorite_color", "type": ["string", "null"]} + ] +} +``` + +This schema defines a record representing a hypothetical user. (Note that a schema file can only contain a single schema definition.) At minimum, a record definition must include its type ("type": "record"), a name ("name": "User"), and fields, in this case name, favorite_number, and favorite_color. We also define a namespace ("namespace": "example.avro"), which together with the name attribute defines the "full name" of the schema (example.avro.User in this case). + +Fields are defined via an array of objects, each of which defines a name and type (other attributes are optional, see the record specification for more details). The type attribute of a field is another schema object, which can be either a primitive or complex type. For example, the name field of our User schema is the primitive type string, whereas the favorite_number and favorite_color fields are both unions, represented by JSON arrays. unions are a complex type that can be any of the types listed in the array; e.g., favorite_number can either be an int or null, essentially making it an optional field. + +## Serializing and deserializing with code generation + +### Compiling the schema +Code generation allows us to automatically create classes based on our previously-defined schema. Once we have defined the relevant classes, there is no need to use the schema directly in our programs. We use the avro-tools jar to generate code as follows: + +```shell +java -jar /path/to/avro-tools-1.11.1.jar compile schema +``` + +This will generate the appropriate source files in a package based on the schema's namespace in the provided destination folder. For instance, to generate a User class in package example.avro from the schema defined above, run + +```shell +java -jar /path/to/avro-tools-1.11.1.jar compile schema user.avsc . +``` + +Note that if you using the Avro Maven plugin, there is no need to manually invoke the schema compiler; the plugin automatically performs code generation on any .avsc files present in the configured source directory. + +### Creating Users +Now that we've completed the code generation, let's create some Users, serialize them to a data file on disk, and then read back the file and deserialize the User objects. + +First let's create some Users and set their fields. + +```java +User user1 = new User(); +user1.setName("Alyssa"); +user1.setFavoriteNumber(256); +// Leave favorite color null + +// Alternate constructor +User user2 = new User("Ben", 7, "red"); + +// Construct via builder +User user3 = User.newBuilder() + .setName("Charlie") + .setFavoriteColor("blue") + .setFavoriteNumber(null) + .build(); +``` + +As shown in this example, Avro objects can be created either by invoking a constructor directly or by using a builder. Unlike constructors, builders will automatically set any default values specified in the schema. Additionally, builders validate the data as it set, whereas objects constructed directly will not cause an error until the object is serialized. However, using constructors directly generally offers better performance, as builders create a copy of the datastructure before it is written. + +Note that we do not set user1's favorite color. Since that record is of type ["string", "null"], we can either set it to a string or leave it null; it is essentially optional. Similarly, we set user3's favorite number to null (using a builder requires setting all fields, even if they are null). + +### Serializing +Now let's serialize our Users to disk. + +```java +// Serialize user1, user2 and user3 to disk +DatumWriter userDatumWriter = new SpecificDatumWriter(User.class); +DataFileWriter dataFileWriter = new DataFileWriter(userDatumWriter); +dataFileWriter.create(user1.getSchema(), new File("users.avro")); +dataFileWriter.append(user1); +dataFileWriter.append(user2); +dataFileWriter.append(user3); +dataFileWriter.close(); +``` + +We create a DatumWriter, which converts Java objects into an in-memory serialized format. The SpecificDatumWriter class is used with generated classes and extracts the schema from the specified generated type. + +Next we create a DataFileWriter, which writes the serialized records, as well as the schema, to the file specified in the dataFileWriter.create call. We write our users to the file via calls to the dataFileWriter.append method. When we are done writing, we close the data file. + +### Deserializing +Finally, let's deserialize the data file we just created. + +```java +// Deserialize Users from disk +DatumReader userDatumReader = new SpecificDatumReader(User.class); +DataFileReader dataFileReader = new DataFileReader(file, userDatumReader); +User user = null; +while (dataFileReader.hasNext()) { +// Reuse user object by passing it to next(). This saves us from +// allocating and garbage collecting many objects for files with +// many items. +user = dataFileReader.next(user); +System.out.println(user); +} +``` + +This snippet will output: + +```json +{"name": "Alyssa", "favorite_number": 256, "favorite_color": null} +{"name": "Ben", "favorite_number": 7, "favorite_color": "red"} +{"name": "Charlie", "favorite_number": null, "favorite_color": "blue"} +``` + +Deserializing is very similar to serializing. We create a SpecificDatumReader, analogous to the SpecificDatumWriter we used in serialization, which converts in-memory serialized items into instances of our generated class, in this case User. We pass the DatumReader and the previously created File to a DataFileReader, analogous to the DataFileWriter, which reads both the schema used by the writer as well as the data from the file on disk. The data will be read using the writer's schema included in the file and the schema provided by the reader, in this case the User class. The writer's schema is needed to know the order in which fields were written, while the reader's schema is needed to know what fields are expected and how to fill in default values for fields added since the file was written. If there are differences between the two schemas, they are resolved according to the Schema Resolution specification. + +Next we use the DataFileReader to iterate through the serialized Users and print the deserialized object to stdout. Note how we perform the iteration: we create a single User object which we store the current deserialized user in, and pass this record object to every call of dataFileReader.next. This is a performance optimization that allows the DataFileReader to reuse the same User object rather than allocating a new User for every iteration, which can be very expensive in terms of object allocation and garbage collection if we deserialize a large data file. While this technique is the standard way to iterate through a data file, it's also possible to use for (User user : dataFileReader) if performance is not a concern. + +### Compiling and running the example code +This example code is included as a Maven project in the examples/java-example directory in the Avro docs. From this directory, execute the following commands to build and run the example: + +```shell +$ mvn compile # includes code generation via Avro Maven plugin +$ mvn -q exec:java -Dexec.mainClass=example.SpecificMain +``` + +### Beta feature: Generating faster code +In release 1.9.0, we introduced a new approach to generating code that speeds up decoding of objects by more than 10% and encoding by more than 30% (future performance enhancements are underway). To ensure a smooth introduction of this change into production systems, this feature is controlled by a feature flag, the system property org.apache.avro.specific.use_custom_coders. In this first release, this feature is off by default. To turn it on, set the system flag to true at runtime. In the sample above, for example, you could enable the fater coders as follows: + +$ mvn -q exec:java -Dexec.mainClass=example.SpecificMain \ + -Dorg.apache.avro.specific.use_custom_coders=true + +Note that you do not have to recompile your Avro schema to have access to this feature. The feature is compiled and built into your code, and you turn it on and off at runtime using the feature flag. As a result, you can turn it on during testing, for example, and then off in production. Or you can turn it on in production, and quickly turn it off if something breaks. + +We encourage the Avro community to exercise this new feature early to help build confidence. (For those paying one-demand for compute resources in the cloud, it can lead to meaningful cost savings.) As confidence builds, we will turn this feature on by default, and eventually eliminate the feature flag (and the old code). + +## Serializing and deserializing without code generation +Data in Avro is always stored with its corresponding schema, meaning we can always read a serialized item regardless of whether we know the schema ahead of time. This allows us to perform serialization and deserialization without code generation. + +Let's go over the same example as in the previous section, but without using code generation: we'll create some users, serialize them to a data file on disk, and then read back the file and deserialize the users objects. + +### Creating users +First, we use a Parser to read our schema definition and create a Schema object. + +```java +Schema schema = new Schema.Parser().parse(new File("user.avsc")); +``` + +Using this schema, let's create some users. + +```java +GenericRecord user1 = new GenericData.Record(schema); +user1.put("name", "Alyssa"); +user1.put("favorite_number", 256); +// Leave favorite color null + +GenericRecord user2 = new GenericData.Record(schema); +user2.put("name", "Ben"); +user2.put("favorite_number", 7); +user2.put("favorite_color", "red"); +``` + +Since we're not using code generation, we use GenericRecords to represent users. GenericRecord uses the schema to verify that we only specify valid fields. If we try to set a non-existent field (e.g., user1.put("favorite_animal", "cat")), we'll get an AvroRuntimeException when we run the program. + +Note that we do not set user1's favorite color. Since that record is of type ["string", "null"], we can either set it to a string or leave it null; it is essentially optional. + +### Serializing +Now that we've created our user objects, serializing and deserializing them is almost identical to the example above which uses code generation. The main difference is that we use generic instead of specific readers and writers. + +First we'll serialize our users to a data file on disk. + +```java +// Serialize user1 and user2 to disk +File file = new File("users.avro"); +DatumWriter datumWriter = new GenericDatumWriter(schema); +DataFileWriter dataFileWriter = new DataFileWriter(datumWriter); +dataFileWriter.create(schema, file); +dataFileWriter.append(user1); +dataFileWriter.append(user2); +dataFileWriter.close(); +``` + +We create a DatumWriter, which converts Java objects into an in-memory serialized format. Since we are not using code generation, we create a GenericDatumWriter. It requires the schema both to determine how to write the GenericRecords and to verify that all non-nullable fields are present. + +As in the code generation example, we also create a DataFileWriter, which writes the serialized records, as well as the schema, to the file specified in the dataFileWriter.create call. We write our users to the file via calls to the dataFileWriter.append method. When we are done writing, we close the data file. + +### Deserializing +Finally, we'll deserialize the data file we just created. + +```java +// Deserialize users from disk +DatumReader datumReader = new GenericDatumReader(schema); +DataFileReader dataFileReader = new DataFileReader(file, datumReader); +GenericRecord user = null; +while (dataFileReader.hasNext()) { +// Reuse user object by passing it to next(). This saves us from +// allocating and garbage collecting many objects for files with +// many items. +user = dataFileReader.next(user); +System.out.println(user); +``` + +This outputs: + +```json +{"name": "Alyssa", "favorite_number": 256, "favorite_color": null} +{"name": "Ben", "favorite_number": 7, "favorite_color": "red"} +``` + +Deserializing is very similar to serializing. We create a GenericDatumReader, analogous to the GenericDatumWriter we used in serialization, which converts in-memory serialized items into GenericRecords. We pass the DatumReader and the previously created File to a DataFileReader, analogous to the DataFileWriter, which reads both the schema used by the writer as well as the data from the file on disk. The data will be read using the writer's schema included in the file, and the reader's schema provided to the GenericDatumReader. The writer's schema is needed to know the order in which fields were written, while the reader's schema is needed to know what fields are expected and how to fill in default values for fields added since the file was written. If there are differences between the two schemas, they are resolved according to the Schema Resolution specification. + +Next, we use the DataFileReader to iterate through the serialized users and print the deserialized object to stdout. Note how we perform the iteration: we create a single GenericRecord object which we store the current deserialized user in, and pass this record object to every call of dataFileReader.next. This is a performance optimization that allows the DataFileReader to reuse the same record object rather than allocating a new GenericRecord for every iteration, which can be very expensive in terms of object allocation and garbage collection if we deserialize a large data file. While this technique is the standard way to iterate through a data file, it's also possible to use for (GenericRecord user : dataFileReader) if performance is not a concern. + +### Compiling and running the example code +This example code is included as a Maven project in the examples/java-example directory in the Avro docs. From this directory, execute the following commands to build and run the example: + +```shell +$ mvn compile +$ mvn -q exec:java -Dexec.mainClass=example.GenericMain +``` diff --git a/doc/content/en/docs/1.11.1/Getting started (Python)/_index.md b/doc/content/en/docs/1.11.1/Getting started (Python)/_index.md new file mode 100644 index 00000000000..26c36f0f1ec --- /dev/null +++ b/doc/content/en/docs/1.11.1/Getting started (Python)/_index.md @@ -0,0 +1,144 @@ +--- +categories: [] +tags: ["python"] +title: "Getting Started (Python)" +linkTitle: "Getting Started (Python)" +weight: 3 +--- + + + +This is a short guide for getting started with Apache Avroâ„ĸ using Python. This guide only covers using Avro for data serialization; see Patrick Hunt's Avro RPC Quick Start for a good introduction to using Avro for RPC. + +## Notice for Python 3 users +A package called "avro-python3" had been provided to support Python 3 previously, but the codebase was consolidated into the "avro" package and that supports both Python 2 and 3 now. The avro-python3 package will be removed in the near future, so users should use the "avro" package instead. They are mostly API compatible, but there's a few minor difference (e.g., function name capitalization, such as avro.schema.Parse vs avro.schema.parse). + +## Download +For Python, the easiest way to get started is to install it from PyPI. Python's Avro API is available over PyPi. + +```shell +$ python3 -m pip install avro +``` + +The official releases of the Avro implementations for C, C++, C#, Java, PHP, Python, and Ruby can be downloaded from the Apache Avroâ„ĸ Releases page. This guide uses Avro 1.11.1, the latest version at the time of writing. Download and unzip avro-1.11.1.tar.gz, and install via python setup.py (this will probably require root privileges). Ensure that you can import avro from a Python prompt. + +```shell +$ tar xvf avro-1.11.1.tar.gz +$ cd avro-1.11.1 +$ python setup.py install +$ python +>>> import avro # should not raise ImportError +``` + +Alternatively, you may build the Avro Python library from source. From your the root Avro directory, run the commands + +```shell +$ cd lang/py/ +$ python3 -m pip install -e . +$ python +``` + +## Defining a schema +Avro schemas are defined using JSON. Schemas are composed of primitive types (null, boolean, int, long, float, double, bytes, and string) and complex types (record, enum, array, map, union, and fixed). You can learn more about Avro schemas and types from the specification, but for now let's start with a simple schema example, user.avsc: + +```json +{"namespace": "example.avro", + "type": "record", + "name": "User", + "fields": [ + {"name": "name", "type": "string"}, + {"name": "favorite_number", "type": ["int", "null"]}, + {"name": "favorite_color", "type": ["string", "null"]} + ] +} +``` + +This schema defines a record representing a hypothetical user. (Note that a schema file can only contain a single schema definition.) At minimum, a record definition must include its type ("type": "record"), a name ("name": "User"), and fields, in this case name, favorite_number, and favorite_color. We also define a namespace ("namespace": "example.avro"), which together with the name attribute defines the "full name" of the schema (example.avro.User in this case). + +Fields are defined via an array of objects, each of which defines a name and type (other attributes are optional, see the record specification for more details). The type attribute of a field is another schema object, which can be either a primitive or complex type. For example, the name field of our User schema is the primitive type string, whereas the favorite_number and favorite_color fields are both unions, represented by JSON arrays. unions are a complex type that can be any of the types listed in the array; e.g., favorite_number can either be an int or null, essentially making it an optional field. + +## Serializing and deserializing without code generation +Data in Avro is always stored with its corresponding schema, meaning we can always read a serialized item, regardless of whether we know the schema ahead of time. This allows us to perform serialization and deserialization without code generation. Note that the Avro Python library does not support code generation. + +Try running the following code snippet, which serializes two users to a data file on disk, and then reads back and deserializes the data file: + +```python +import avro.schema +from avro.datafile import DataFileReader, DataFileWriter +from avro.io import DatumReader, DatumWriter + +schema = avro.schema.parse(open("user.avsc", "rb").read()) + +writer = DataFileWriter(open("users.avro", "wb"), DatumWriter(), schema) +writer.append({"name": "Alyssa", "favorite_number": 256}) +writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"}) +writer.close() + +reader = DataFileReader(open("users.avro", "rb"), DatumReader()) +for user in reader: + print user +reader.close() +``` + +This outputs: + +```json +{u'favorite_color': None, u'favorite_number': 256, u'name': u'Alyssa'} +{u'favorite_color': u'red', u'favorite_number': 7, u'name': u'Ben'} +``` + +Do make sure that you open your files in binary mode (i.e. using the modes wb or rb respectively). Otherwise you might generate corrupt files due to automatic replacement of newline characters with the platform-specific representations. + +Let's take a closer look at what's going on here. + +```python +schema = avro.schema.parse(open("user.avsc", "rb").read()) +``` + +avro.schema.parse takes a string containing a JSON schema definition as input and outputs a avro.schema.Schema object (specifically a subclass of Schema, in this case RecordSchema). We're passing in the contents of our user.avsc schema file here. + +```python +writer = DataFileWriter(open("users.avro", "wb"), DatumWriter(), schema) +``` + +We create a DataFileWriter, which we'll use to write serialized items to a data file on disk. The DataFileWriter constructor takes three arguments: + +* The file we'll serialize to +* A DatumWriter, which is responsible for actually serializing the items to Avro's binary format (DatumWriters can be used separately from DataFileWriters, e.g., to perform IPC with Avro). +* The schema we're using. The DataFileWriter needs the schema both to write the schema to the data file, and to verify that the items we write are valid items and write the appropriate fields. +writer.append({"name": "Alyssa", "favorite_number": 256}) +writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"}) + +We use DataFileWriter.append to add items to our data file. Avro records are represented as Python dicts. Since the field favorite_color has type ["int", "null"], we are not required to specify this field, as shown in the first append. Were we to omit the required name field, an exception would be raised. Any extra entries not corresponding to a field are present in the dict are ignored. + +```python +reader = DataFileReader(open("users.avro", "rb"), DatumReader()) +``` + +We open the file again, this time for reading back from disk. We use a DataFileReader and DatumReader analagous to the DataFileWriter and DatumWriter above. + +```python +for user in reader: + print user +``` + +The DataFileReader is an iterator that returns dicts corresponding to the serialized items. diff --git a/doc/content/en/docs/1.11.1/IDL Language/_index.md b/doc/content/en/docs/1.11.1/IDL Language/_index.md new file mode 100644 index 00000000000..c1405393779 --- /dev/null +++ b/doc/content/en/docs/1.11.1/IDL Language/_index.md @@ -0,0 +1,433 @@ +--- +title: "IDL Language" +linkTitle: "IDL Language" +weight: 201 +--- + + + +## Introduction +This document defines Avro IDL, a higher-level language for authoring Avro schemata. Before reading this document, you should have familiarity with the concepts of schemata and protocols, as well as the various primitive and complex types available in Avro. + +## Overview + +### Purpose +The aim of the Avro IDL language is to enable developers to author schemata in a way that feels more similar to common programming languages like Java, C++, or Python. Additionally, the Avro IDL language may feel more familiar for those users who have previously used the interface description languages (IDLs) in other frameworks like Thrift, Protocol Buffers, or CORBA. + +### Usage +Each Avro IDL file defines a single Avro Protocol, and thus generates as its output a JSON-format Avro Protocol file with extension .avpr. + +To convert a _.avdl_ file into a _.avpr_ file, it may be processed by the `idl` tool. For example: +```shell +$ java -jar avro-tools.jar idl src/test/idl/input/namespaces.avdl /tmp/namespaces.avpr +$ head /tmp/namespaces.avpr +{ + "protocol" : "TestNamespace", + "namespace" : "avro.test.protocol", +``` +The `idl` tool can also process input to and from _stdin_ and _stdout_. See `idl --help` for full usage information. + +A Maven plugin is also provided to compile .avdl files. To use it, add something like the following to your pom.xml: +```xml + + + + org.apache.avro + avro-maven-plugin + + + + idl-protocol + + + + + + +``` + +## Defining a Protocol in Avro IDL +An Avro IDL file consists of exactly one protocol definition. The minimal protocol is defined by the following code: +```java +protocol MyProtocol { +} +``` +This is equivalent to (and generates) the following JSON protocol definition: +```json +{ +"protocol" : "MyProtocol", + "types" : [ ], + "messages" : { + } +} +``` +The namespace of the protocol may be changed using the @namespace annotation: +```java +@namespace("mynamespace") +protocol MyProtocol { +} +``` +This notation is used throughout Avro IDL as a way of specifying properties for the annotated element, as will be described later in this document. + +Protocols in Avro IDL can contain the following items: + +* Imports of external protocol and schema files. +* Definitions of named schemata, including records, errors, enums, and fixeds. +* Definitions of RPC messages + +## Imports +Files may be imported in one of three formats: + +* An IDL file may be imported with a statement like: + + `import idl "foo.avdl";` + +* A JSON protocol file may be imported with a statement like: + + `import protocol "foo.avpr";` + +* A JSON schema file may be imported with a statement like: + + `import schema "foo.avsc";` + +Messages and types in the imported file are added to this file's protocol. + +Imported file names are resolved relative to the current IDL file. + +## Defining an Enumeration +Enums are defined in Avro IDL using a syntax similar to C or Java. An Avro Enum supports optional default values. In the case that a reader schema is unable to recognize a symbol written by the writer, the reader will fall back to using the defined default value. This default is only used when an incompatible symbol is read. It is not used if the enum field is missing. + +Example Writer Enum Definition +```java +enum Shapes { + SQUARE, TRIANGLE, CIRCLE, OVAL +} +``` +Example Reader Enum Definition +```java +enum Shapes { + SQUARE, TRIANGLE, CIRCLE +} = CIRCLE; +``` +In the above example, the reader will use the default value of `CIRCLE` whenever reading data written with the `OVAL` symbol of the writer. Also note that, unlike the JSON format, anonymous enums cannot be defined. + +## Defining a Fixed Length Field +Fixed fields are defined using the following syntax: +``` +fixed MD5(16); +``` +This example defines a fixed-length type called MD5 which contains 16 bytes. + +## Defining Records and Errors +Records are defined in Avro IDL using a syntax similar to a struct definition in C: +```java +record Employee { + string name; + boolean active = true; + long salary; +} +``` +The above example defines a record with the name “Employee” with three fields. + +To define an error, simply use the keyword _error_ instead of _record_. For example: +```java +error Kaboom { + string explanation; + int result_code = -1; +} +``` +Each field in a record or error consists of a type and a name, optional property annotations and an optional default value. + +A type reference in Avro IDL must be one of: + +* A primitive type +* A logical type +* A named schema defined prior to this usage in the same Protocol +* A complex type (array, map, or union) + +### Primitive Types +The primitive types supported by Avro IDL are the same as those supported by Avro's JSON format. This list includes _int_, _long_, _string_, _boolean_, _float_, _double_, _null_, and _bytes_. + +### Logical Types +Some of the logical types supported by Avro's JSON format are also supported by Avro IDL. The currently supported types are: + +* _decimal_ (logical type [decimal]({{< relref "../specification#decimal" >}})) +* _date_ (logical type [date]({{< relref "../specification#date" >}})) +* _time_ms_ (logical type [time-millis]({{< relref "../specification#time-millisecond-precision" >}})) +* _timestamp_ms_ (logical type [timestamp-millis]({{< relref "../specification#timestamp-millisecond-precision" >}})) + +For example: +```java +record Job { + string jobid; + date submitDate; + time_ms submitTime; + timestamp_ms finishTime; + decimal(9,2) finishRatio; +} +``` + +Logical types can also be specified via an annotation, which is useful for logical types for which a keyword does not exist: + +```java +record Job { + string jobid; + @logicalType("timestamp-micros") + long finishTime; +} +``` + +### References to Named Schemata +If a named schema has already been defined in the same Avro IDL file, it may be referenced by name as if it were a primitive type: +```java +record Card { + Suit suit; // refers to the enum Card defined above + int number; +} +``` + +### Default Values +Default values for fields may be optionally specified by using an equals sign after the field name followed by a JSON expression indicating the default value. This JSON is interpreted as described in the [spec]({{< relref "../specification#schema-record" >}}). + +### Complex Types + +#### Arrays +Array types are written in a manner that will seem familiar to C++ or Java programmers. An array of any type t is denoted `array`. For example, an array of strings is denoted `array`, and a multidimensional array of Foo records would be `array>`. + +#### Maps +Map types are written similarly to array types. An array that contains values of type t is written `map`. As in the JSON schema format, all maps contain `string`-type keys. + +#### Unions +Union types are denoted as `union { typeA, typeB, typeC, ... }`. For example, this record contains a string field that is optional (unioned with null), and a field containing either a precise or a imprecise number: +```java +record RecordWithUnion { + union { null, string } optionalString; + union { decimal(12, 6), float } number; +} +``` +Note that the same restrictions apply to Avro IDL unions as apply to unions defined in the JSON format; namely, a record may not contain multiple elements of the same type. Also, fields/parameters that use the union type and have a default parameter must specify a default value of the same type as the **first** union type. + +Because it occurs so often, there is a special shorthand to denote a union of `null` with another type. In the following snippet, the first three fields have identical types: + +```java +record RecordWithUnion { + union { null, string } optionalString1 = null; + string? optionalString2 = null; + string? optionalString3; // No default value + string? optionalString4 = "something"; +} +``` + +Note that unlike explicit unions, the position of the `null` type is fluid; it will be the first or last type depending on the default value (if any). So in the example above, all fields are valid. + +## Defining RPC Messages +The syntax to define an RPC message within a Avro IDL protocol is similar to the syntax for a method declaration within a C header file or a Java interface. To define an RPC message add which takes two arguments named _foo_ and _bar_, returning an _int_, simply include the following definition within the protocol: +```java +int add(int foo, int bar = 0); +``` +Message arguments, like record fields, may specify default values. + +To define a message with no response, you may use the alias _void_, equivalent to the Avro _null_ type: +```java +void logMessage(string message); +``` +If you have previously defined an error type within the same protocol, you may declare that a message can throw this error using the syntax: +```java +void goKaboom() throws Kaboom; +``` +To define a one-way message, use the keyword `oneway` after the parameter list, for example: +```java +void fireAndForget(string message) oneway; +``` + +## Other Language Features + +### Comments +All Java-style comments are supported within a Avro IDL file. Any text following _//_ on a line is ignored, as is any text between _/*_ and _*/_, possibly spanning multiple lines. + +Comments that begin with _/**_ are used as the documentation string for the type or field definition that follows the comment. + +### Escaping Identifiers +Occasionally, one will need to use a reserved language keyword as an identifier. In order to do so, backticks (`) may be used to escape the identifier. For example, to define a message with the literal name error, you may write: +```java +void `error`(); +``` +This syntax is allowed anywhere an identifier is expected. + +### Annotations for Ordering and Namespaces +Java-style annotations may be used to add additional properties to types and fields throughout Avro IDL. + +For example, to specify the sort order of a field within a record, one may use the `@order` annotation before the field name as follows: +```java +record MyRecord { + string @order("ascending") myAscendingSortField; + string @order("descending") myDescendingField; + string @order("ignore") myIgnoredField; +} +``` +A field's type (with the exception of type references) may also be preceded by annotations, e.g.: +```java +record MyRecord { + @java-class("java.util.ArrayList") array myStrings; +} +``` +This can be used to support java classes that can be serialized/deserialized via their `toString`/`String constructor`, e.g.: +```java +record MyRecord { + @java-class("java.math.BigDecimal") string value; + @java-key-class("java.io.File") map fileStates; + array<@java-class("java.math.BigDecimal") string> weights; +} +``` +Similarly, a `@namespace` annotation may be used to modify the namespace when defining a named schema. For example: +```java +@namespace("org.apache.avro.firstNamespace") +protocol MyProto { + @namespace("org.apache.avro.someOtherNamespace") + record Foo {} + + record Bar {} +} +``` +will define a protocol in the _firstNamespace_ namespace. The record _Foo_ will be defined in _someOtherNamespace_ and _Bar_ will be defined in _firstNamespace_ as it inherits its default from its container. + +Type and field aliases are specified with the `@aliases` annotation as follows: +```java +@aliases(["org.old.OldRecord", "org.ancient.AncientRecord"]) +record MyRecord { + string @aliases(["oldField", "ancientField"]) myNewField; +} +``` +Some annotations like those listed above are handled specially. All other annotations are added as properties to the protocol, message, schema or field. + +## Complete Example +The following is an example of an Avro IDL file that shows most of the above features: +```java +/* +* Header with license information. +*/ + +/** + * An example protocol in Avro IDL + */ +@namespace("org.apache.avro.test") +protocol Simple { + /** Documentation for the enum type Kind */ + @aliases(["org.foo.KindOf"]) + enum Kind { + FOO, + BAR, // the bar enum value + BAZ + } = FOO; // For schema evolution purposes, unmatched values do not throw an error, but are resolved to FOO. + + /** MD5 hash; good enough to avoid most collisions, and smaller than (for example) SHA256. */ + fixed MD5(16); + + record TestRecord { + /** Record name; has no intrinsic order */ + string @order("ignore") name; + + Kind @order("descending") kind; + + MD5 hash; + + /* + Note that 'null' is the first union type. Just like .avsc / .avpr files, the default value must be of the first union type. + */ + union { null, MD5 } /** Optional field */ @aliases(["hash"]) nullableHash = null; + + array arrayOfLongs; + } + + /** Errors are records that can be thrown from a method */ + error TestError { + string message; + } + + string hello(string greeting); + /** Return what was given. Demonstrates the use of backticks to name types/fields/messages/parameters after keywords */ + TestRecord echo(TestRecord `record`); + int add(int arg1, int arg2); + bytes echoBytes(bytes data); + void `error`() throws TestError; + // The oneway keyword forces the method to return null. + void ping() oneway; +} +``` +Additional examples may be found in the Avro source tree under the `src/test/idl/input` directory. + +## IDE support + +There are several editors and IDEs that support Avro IDL files, usually via plugins. + +### JetBrains + +Apache Avro IDL Schema Support 203.1.2 was released in 9 December 2021. + +Features: +* Syntax Highlighting +* Code Completion +* Code Formatting +* Error Highlighting +* Inspections & quick fixes +* JSON schemas for .avpr and .avsc files + +It's available via the [JetBrains Marketplace](https://plugins.jetbrains.com/plugin/15728-apache-avro-idl-schema-support) +and on [GitHub](https://github.com/opwvhk/avro-schema-support). + +The plugin supports almost the all JetBrains products: IntelliJ IDEA, PyCharm, WebStorm, Android Studio, AppCode, GoLand, Rider, CLion, RubyMine, PhpStorm, DataGrip, DataSpell, MPS, Code With Me Guest and JetBrains Client. + +Only JetBrains Gateway does not support this plugin directly. But the backend (JetBrains) IDE that it connects to does. + +### Eclipse + +Avroclipse 0.0.11 was released on 4 December 2019. + +Features: +* Syntax Highlighting +* Error Highlighting +* Code Completion + +It is available on the [Eclipse Marketplace](https://marketplace.eclipse.org/content/avroclipse) +and [GitHub](https://github.com/dvdkruk/avroclipse). + +### Visual Studio Code + +avro-idl 0.5.0 was released on 16 June 2021. It provides syntax highlighting. + +It is available on the [VisualStudio Marketplace](https://marketplace.visualstudio.com/items?itemName=streetsidesoftware.avro) +and [GitHub](https://github.com/Jason3S/vscode-avro-ext) + +### Atom.io + +atom-language-avro 0.0.13 was released on 14 August 2015. It provides syntax highlighting. + +It is available as [Atom.io package](https://atom.io/packages/atom-language-avro) +and [GitHub](https://github.com/jonesetc/atom-language-avro) + +### Vim + +A `.avdl` detecting plugin by Gurpreet Atwal on [GitHub](https://github.com/gurpreetatwal/vim-avro) (Last change in December 2016) + +[avro-idl.vim](https://github.com/apache/avro/blob/master/share/editors/avro-idl.vim) in the Avro repository `share/editors` directory (last change in September 2010) + +Both provide syntax highlighting. diff --git a/doc/content/en/docs/1.11.1/MapReduce guide/_index.md b/doc/content/en/docs/1.11.1/MapReduce guide/_index.md new file mode 100644 index 00000000000..e51def02142 --- /dev/null +++ b/doc/content/en/docs/1.11.1/MapReduce guide/_index.md @@ -0,0 +1,396 @@ +--- +title: "MapReduce guide" +linkTitle: "MapReduce guide" +weight: 200 +--- + + + +Avro provides a convenient way to represent complex data structures within a Hadoop MapReduce job. Avro data can be used as both input to and output from a MapReduce job, as well as the intermediate format. The example in this guide uses Avro data for all three, but it's possible to mix and match; for instance, MapReduce can be used to aggregate a particular field in an Avro record. + +This guide assumes basic familiarity with both Hadoop MapReduce and Avro. See the [Hadoop documentation](https://hadoop.apache.org/docs/current/) and the [Avro getting started guide](./getting-started-java/) for introductions to these projects. This guide uses the old MapReduce API (`org.apache.hadoop.mapred`) and the new MapReduce API (`org.apache.hadoop.mapreduce`). + +## Setup +The code from this guide is included in the Avro docs under examples/mr-example. The example is set up as a Maven project that includes the necessary Avro and MapReduce dependencies and the Avro Maven plugin for code generation, so no external jars are needed to run the example. In particular, the POM includes the following dependencies: +```xml + + org.apache.avro + avro + 1.11.1 + + + org.apache.avro + avro-mapred + 1.11.1 + + + org.apache.hadoop + hadoop-client + 3.1.2 + +``` +And the following plugin: +```xml + + org.apache.avro + avro-maven-plugin + 1.11.1 + + + generate-sources + + schema + + + ${project.basedir}/../ + ${project.basedir}/target/generated-sources/ + + + + +``` + +If you do not configure the *sourceDirectory* and *outputDirectory* properties, the defaults will be used. The *sourceDirectory* property defaults to *src/main/avro*. The *outputDirectory* property defaults to *target/generated-sources*. You can change the paths to match your project layout. + +Alternatively, Avro jars can be downloaded directly from the Apache Avroâ„ĸ Releases [page](https://avro.apache.org/releases.html). The relevant Avro jars for this guide are *avro-1.11.1.jar* and *avro-mapred-1.11.1.jar*, as well as *avro-tools-1.11.1.jar* for code generation and viewing Avro data files as JSON. In addition, you will need to install Hadoop in order to use MapReduce. + +## Example: ColorCount +Below is a simple example of a MapReduce that uses Avro. There is an example for both the old (org.apache.hadoop.mapred) and new (org.apache.hadoop.mapreduce) APIs under *examples/mr-example/src/main/java/example/*. _MapredColorCount_ is the example for the older mapred API while _MapReduceColorCount_ is the example for the newer mapreduce API. Both examples are below, but we will detail the mapred API in our subsequent examples. + +MapredColorCount.java: +```java +package example; + +import java.io.IOException; + +import org.apache.avro.*; +import org.apache.avro.Schema.Type; +import org.apache.avro.mapred.*; +import org.apache.hadoop.conf.*; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapred.*; +import org.apache.hadoop.util.*; + +import example.avro.User; + +public class MapredColorCount extends Configured implements Tool { + + public static class ColorCountMapper extends AvroMapper> { + @Override + public void map(User user, AvroCollector> collector, Reporter reporter) + throws IOException { + CharSequence color = user.getFavoriteColor(); + // We need this check because the User.favorite_color field has type ["string", "null"] + if (color == null) { + color = "none"; + } + collector.collect(new Pair(color, 1)); + } + } + + public static class ColorCountReducer extends AvroReducer> { + @Override + public void reduce(CharSequence key, Iterable values, + AvroCollector> collector, + Reporter reporter) + throws IOException { + int sum = 0; + for (Integer value : values) { + sum += value; + } + collector.collect(new Pair(key, sum)); + } + } + + public int run(String[] args) throws Exception { + if (args.length != 2) { + System.err.println("Usage: MapredColorCount "); + return -1; + } + + JobConf conf = new JobConf(getConf(), MapredColorCount.class); + conf.setJobName("colorcount"); + + FileInputFormat.setInputPaths(conf, new Path(args[0])); + FileOutputFormat.setOutputPath(conf, new Path(args[1])); + + AvroJob.setMapperClass(conf, ColorCountMapper.class); + AvroJob.setReducerClass(conf, ColorCountReducer.class); + + // Note that AvroJob.setInputSchema and AvroJob.setOutputSchema set + // relevant config options such as input/output format, map output + // classes, and output key class. + AvroJob.setInputSchema(conf, User.getClassSchema()); + AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING), + Schema.create(Type.INT))); + + JobClient.runJob(conf); + return 0; + } + + public static void main(String[] args) throws Exception { + int res = ToolRunner.run(new Configuration(), new MapredColorCount(), args); + System.exit(res); + } +} +``` + +MapReduceColorCount.java: +```java +package example; + +import java.io.IOException; + +import org.apache.avro.Schema; +import org.apache.avro.mapred.AvroKey; +import org.apache.avro.mapred.AvroValue; +import org.apache.avro.mapreduce.AvroJob; +import org.apache.avro.mapreduce.AvroKeyInputFormat; +import org.apache.avro.mapreduce.AvroKeyValueOutputFormat; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +import example.avro.User; + +public class MapReduceColorCount extends Configured implements Tool { + + public static class ColorCountMapper extends + Mapper, NullWritable, Text, IntWritable> { + + @Override + public void map(AvroKey key, NullWritable value, Context context) + throws IOException, InterruptedException { + + CharSequence color = key.datum().getFavoriteColor(); + if (color == null) { + color = "none"; + } + context.write(new Text(color.toString()), new IntWritable(1)); + } + } + + public static class ColorCountReducer extends + Reducer, AvroValue> { + + @Override + public void reduce(Text key, Iterable values, + Context context) throws IOException, InterruptedException { + + int sum = 0; + for (IntWritable value : values) { + sum += value.get(); + } + context.write(new AvroKey(key.toString()), new AvroValue(sum)); + } + } + + public int run(String[] args) throws Exception { + if (args.length != 2) { + System.err.println("Usage: MapReduceColorCount "); + return -1; + } + + Job job = new Job(getConf()); + job.setJarByClass(MapReduceColorCount.class); + job.setJobName("Color Count"); + + FileInputFormat.setInputPaths(job, new Path(args[0])); + FileOutputFormat.setOutputPath(job, new Path(args[1])); + + job.setInputFormatClass(AvroKeyInputFormat.class); + job.setMapperClass(ColorCountMapper.class); + AvroJob.setInputKeySchema(job, User.getClassSchema()); + job.setMapOutputKeyClass(Text.class); + job.setMapOutputValueClass(IntWritable.class); + + job.setOutputFormatClass(AvroKeyValueOutputFormat.class); + job.setReducerClass(ColorCountReducer.class); + AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); + AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); + + return (job.waitForCompletion(true) ? 0 : 1); + } + + public static void main(String[] args) throws Exception { + int res = ToolRunner.run(new MapReduceColorCount(), args); + System.exit(res); + } +} +``` +ColorCount reads in data files containing *User* records, defined in _examples/user.avsc_, and counts the number of instances of each favorite color. (This example draws inspiration from the canonical _WordCount_ MapReduce application.) This example uses the old MapReduce API. See MapReduceAvroWordCount, found under _doc/examples/mr-example/src/main/java/example/_ to see the new MapReduce API example. The User schema is defined as follows: +```json +{"namespace": "example.avro", + "type": "record", + "name": "User", + "fields": [ + {"name": "name", "type": "string"}, + {"name": "favorite_number", "type": ["int", "null"]}, + {"name": "favorite_color", "type": ["string", "null"]} + ] +} +``` +This schema is compiled into the *User* class used by *ColorCount* via the Avro Maven plugin (see _examples/mr-example/pom.xml_ for how this is set up). + +*ColorCountMapper* essentially takes a *User* as input and extracts the User's favorite color, emitting the key-value pair ``. _ColorCountReducer_ then adds up how many occurrences of a particular favorite color were emitted, and outputs the result as a Pair record. These Pairs are serialized to an Avro data file. + +## Running ColorCount +The _ColorCount_ application is provided as a Maven project in the Avro docs under _examples/mr-example_. To build the project, including the code generation of the User schema, run: +```shell +mvn compile +``` +Next, run _GenerateData_ from `examples/mr-examples` to create an Avro data file, `input/users.avro`, containing 20 Users with favorite colors chosen randomly from a list: +```shell +mvn exec:java -q -Dexec.mainClass=example.GenerateData +``` +Besides creating the data file, GenerateData prints the JSON representations of the Users generated to stdout, for example: +```json +{"name": "user", "favorite_number": null, "favorite_color": "red"} +{"name": "user", "favorite_number": null, "favorite_color": "green"} +{"name": "user", "favorite_number": null, "favorite_color": "purple"} +{"name": "user", "favorite_number": null, "favorite_color": null} +... +``` +Now we're ready to run ColorCount. We specify our freshly-generated input folder as the input path and output as our output folder (note that MapReduce will not start a job if the output folder already exists): +```shell +mvn exec:java -q -Dexec.mainClass=example.MapredColorCount -Dexec.args="input output" +``` +Once ColorCount completes, checking the contents of the new output directory should yield the following: +```shell +$ ls output/ +part-00000.avro _SUCCESS +``` +You can check the contents of the generated Avro file using the avro-tools jar: +```shell +$ java -jar /path/to/avro-tools-1.11.1.jar tojson output/part-00000.avro +{"value": 3, "key": "blue"} +{"value": 7, "key": "green"} +{"value": 1, "key": "none"} +{"value": 2, "key": "orange"} +{"value": 3, "key": "purple"} +{"value": 2, "key": "red"} +{"value": 2, "key": "yellow"} +``` +Now let's go over the ColorCount example in detail. + +## AvroMapper - org.apache.hadoop.mapred API + +The easiest way to use Avro data files as input to a MapReduce job is to subclass `AvroMapper`. An `AvroMapper` defines a `map` function that takes an Avro datum as input and outputs a key/value pair represented as a Pair record. In the ColorCount example, ColorCountMapper is an AvroMapper that takes a User as input and outputs a `Pair>`, where the CharSequence key is the user's favorite color and the Integer value is 1. +```java +public static class ColorCountMapper extends AvroMapper> { + @Override + public void map(User user, AvroCollector> collector, Reporter reporter) + throws IOException { + CharSequence color = user.getFavoriteColor(); + // We need this check because the User.favorite_color field has type ["string", "null"] + if (color == null) { + color = "none"; + } + collector.collect(new Pair(color, 1)); + } +} +``` +In order to use our AvroMapper, we must call AvroJob.setMapperClass and AvroJob.setInputSchema. +```java +AvroJob.setMapperClass(conf, ColorCountMapper.class); +AvroJob.setInputSchema(conf, User.getClassSchema()); +``` +Note that `AvroMapper` does not implement the `Mapper` interface. Under the hood, the specified Avro data files are deserialized into AvroWrappers containing the actual data, which are processed by a Mapper that calls the configured AvroMapper's map function. AvroJob.setInputSchema sets up the relevant configuration parameters needed to make this happen, thus you should not need to call `JobConf.setMapperClass`, `JobConf.setInputFormat`, `JobConf.setMapOutputKeyClass`, `JobConf.setMapOutputValueClass`, or `JobConf.setOutputKeyComparatorClass`. + +## Mapper - org.apache.hadoop.mapreduce API +This document will not go into all the differences between the mapred and mapreduce APIs, however will describe the main differences. As you can see, ColorCountMapper is now a subclass of the Hadoop Mapper class and is passed an AvroKey as it's key. Additionally, the AvroJob method calls were slightly changed. +```java + public static class ColorCountMapper extends + Mapper, NullWritable, Text, IntWritable> { + + @Override + public void map(AvroKey key, NullWritable value, Context context) + throws IOException, InterruptedException { + + CharSequence color = key.datum().getFavoriteColor(); + if (color == null) { + color = "none"; + } + context.write(new Text(color.toString()), new IntWritable(1)); + } + } +``` + +## AvroReducer - org.apache.hadoop.mapred API +Analogously to AvroMapper, an AvroReducer defines a reducer function that takes the key/value types output by an AvroMapper (or any mapper that outputs Pairs) and outputs a key/value pair represented a Pair record. In the ColorCount example, ColorCountReducer is an AvroReducer that takes the CharSequence key representing a favorite color and the `Iterable` representing the counts for that color (they should all be 1 in this example) and adds up the counts. +```java +public static class ColorCountReducer extends AvroReducer> { + @Override + public void reduce(CharSequence key, Iterable values, + AvroCollector> collector, + Reporter reporter) + throws IOException { + int sum = 0; + for (Integer value : values) { + sum += value; + } + collector.collect(new Pair(key, sum)); + } +} +``` +In order to use our AvroReducer, we must call AvroJob.setReducerClass and AvroJob.setOutputSchema. +```java +AvroJob.setReducerClass(conf, ColorCountReducer.class); +AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING), + Schema.create(Type.INT))); +``` +Note that _AvroReducer_ does not implement the _Reducer_ interface. The intermediate Pairs output by the mapper are split into _AvroKeys_ and _AvroValues_, which are processed by a Reducer that calls the configured AvroReducer's `reduce` function. `AvroJob.setOutputSchema` sets up the relevant configuration parameters needed to make this happen, thus you should not need to call `JobConf.setReducerClass`, `JobConf.setOutputFormat`, `JobConf.setOutputKeyClass`, `JobConf.setMapOutputKeyClass`, `JobConf.setMapOutputValueClass`, or `JobConf.setOutputKeyComparatorClass`. + +## Reduce - org.apache.hadoop.mapreduce API +As before we not detail every difference between the APIs. As with the _Mapper_ change _ColorCountReducer_ is now a subclass of _Reducer_ and _AvroKey_ and _AvroValue_ are emitted. Additionally, the _AvroJob_ method calls were slightly changed. +```java + public static class ColorCountReducer extends + Reducer, AvroValue> { + + @Override + public void reduce(Text key, Iterable values, + Context context) throws IOException, InterruptedException { + + int sum = 0; + for (IntWritable value : values) { + sum += value.get(); + } + context.write(new AvroKey(key.toString()), new AvroValue(sum)); + } + } +``` + +## Learning more +The mapred API allows users to mix Avro AvroMappers and AvroReducers with non-Avro Mappers and Reducers and the mapreduce API allows users input Avro and output non-Avro or vice versa. + +The mapred package has API org.apache.avro.mapred documentation as does the `org.apache.avro.mapreduce` package. MapReduce API (`org.apache.hadoop.mapreduce`). Similarily to the mapreduce package, it's possible with the mapred API to implement your own Mappers and Reducers directly using the public classes provided in these libraries. See the `AvroWordCount` application, found under _examples/mr-example/src/main/java/example/AvroWordCount.java_ in the Avro documentation, for an example of implementing a Reducer that outputs Avro data using the old MapReduce API. See the `MapReduceAvroWordCount` application, found under _examples/mr-example/src/main/java/example/MapReduceAvroWordCount.java_ in the Avro documentation, for an example of implementing a Reducer that outputs Avro data using the new MapReduce API. diff --git a/doc/content/en/docs/1.11.1/SASL profile/_index.md b/doc/content/en/docs/1.11.1/SASL profile/_index.md new file mode 100644 index 00000000000..67c316e221c --- /dev/null +++ b/doc/content/en/docs/1.11.1/SASL profile/_index.md @@ -0,0 +1,93 @@ +--- +title: "SASL profile" +linkTitle: "SASL profile" +weight: 202 +--- + + + +## Introduction +SASL ([RFC 2222](https://www.ietf.org/rfc/rfc2222.txt)) provides a framework for authentication and security of network protocols. Each protocol that uses SASL is meant to define a SASL profile. This document provides a SASL profile for connection-based Avro RPC. + +## Overview +SASL negotiation proceeds as a series of message interactions over a connection between a client and server using a selected SASL mechanism. The client starts this negotiation by sending its chosen mechanism name with an initial (possibly empty) message. Negotiation proceeds with the exchange of messages until either side indicates success or failure. The content of the messages is mechanism-specific. If the negotiation succeeds, then the session can proceed over the connection, otherwise it must be abandoned. + +Some mechanisms continue to process session data after negotiation (e.g., encrypting it), while some specify that further session data is transmitted unmodifed. + +## Negotiation + +### Commands +Avro SASL negotiation uses four one-byte commands. + +* 0: START Used in a client's initial message. +* 1: CONTINUE Used while negotiation is ongoing. +* 2: FAIL Terminates negotiation unsuccessfully. +* 3: COMPLETE Terminates negotiation sucessfully. + +The format of a START message is: + +`| 0 | 4-byte mechanism name length | mechanism name | 4-byte payload length | payload data |` + +The format of a CONTINUE message is: + +`| 1 | 4-byte payload length | payload data |` + +The format of a FAIL message is: + +`| 2 | 4-byte message length | UTF-8 message |` + +The format of a COMPLETE message is: + +`| 3 | 4-byte payload length | payload data |` + +### Process +Negotiation is initiated by a client sending a START command containing the client's chosen mechanism name and any mechanism-specific payload data. + +The server and client then interchange some number (possibly zero) of CONTINUE messages. Each message contains payload data that is processed by the security mechanism to generate the next message. + +Once either the client or server send a FAIL message then negotiation has failed. UTF-8-encoded text is included in the failure message. Once either a FAIL message has been sent or received, or any other error occurs in the negotiation, further communication on this connection must cease. + +Once either the client or server send a COMPLETE message then negotiation has completed successfully. Session data may now be transmitted over the connection until it is closed by either side. + +## Session Data +If no SASL QOP (quality of protection) is negotiated, then all subsequent writes to/reads over this connection are written/read unmodified. In particular, messages use Avro [framing](#Message+Framing), and are of the form: + +`| 4-byte frame length | frame data | ... | 4 zero bytes |` + +If a SASL QOP is negotiated, then it must be used by the connection for all subsequent messages. This is done by wrapping each non-empty frame written using the security mechanism and unwrapping each non-empty frame read. The length written in each non-empty frame is the length of the wrapped data. Complete frames must be passed to the security mechanism for unwrapping. Unwrapped data is then passed to the application as the content of the frame. + +If at any point processing fails due to wrapping, unwrapping or framing errors, then all further communication on this connection must cease. + +## Anonymous Mechanism +The SASL anonymous mechanism ([RFC 2245](https://www.ietf.org/rfc/rfc2222.txt)) is quite simple to implement. In particular, an initial anonymous request may be prefixed by the following static sequence: + +`| 0 | 0009 | ANONYMOUS | 0000 |` + +If a server uses the anonymous mechanism, it should check that the mechanism name in the start message prefixing the first request received is 'ANONYMOUS', then simply prefix its initial response with a COMPLETE message of: + +`| 3 | 0000 |` + +If an anonymous server recieves some other mechanism name, then it may respond with a FAIL message as simple as: + +`| 2 | 0000 |` + +Note that the anonymous mechanism need add no additional round-trip messages between client and server. The START message can be piggybacked on the initial request and the COMPLETE or FAIL message can be piggybacked on the initial response. diff --git a/doc/content/en/docs/1.11.1/Specification/_index.md b/doc/content/en/docs/1.11.1/Specification/_index.md new file mode 100755 index 00000000000..49bdaf77f17 --- /dev/null +++ b/doc/content/en/docs/1.11.1/Specification/_index.md @@ -0,0 +1,838 @@ +--- +title: "Specification" +linkTitle: "Specification" +weight: 4 +date: 2021-10-25 +aliases: +- spec.html +--- + + + +## Introduction +This document defines Apache Avro. It is intended to be the authoritative specification. Implementations of Avro must adhere to this document. + +## Schema Declaration {#schema-declaration} +A Schema is represented in [JSON](https://www.json.org/) by one of: + +* A JSON string, naming a defined type. +* A JSON object, of the form: +```json +{"type": "typeName" ...attributes...} +``` +where _typeName_ is either a primitive or derived type name, as defined below. Attributes not defined in this document are permitted as metadata, but must not affect the format of serialized data. +* A JSON array, representing a union of embedded types. + +## Primitive Types +The set of primitive type names is: + +* _null_: no value +* _boolean_: a binary value +* _int_: 32-bit signed integer +* _long_: 64-bit signed integer +* _float_: single precision (32-bit) IEEE 754 floating-point number +* _double_: double precision (64-bit) IEEE 754 floating-point number +* _bytes_: sequence of 8-bit unsigned bytes +* _string_: unicode character sequence + +Primitive types have no specified attributes. + +Primitive type names are also defined type names. Thus, for example, the schema "string" is equivalent to: +```json +{"type": "string"} +``` + +## Complex Types +Avro supports six kinds of complex types: _records_, _enums_, _arrays_, _maps_, _unions_ and _fixed_. + +### Records {#schema-record} +Records use the type name "record" and support the following attributes: + +* _name_: a JSON string providing the name of the record (required). +* _namespace_, a JSON string that qualifies the name (optional); +* _doc_: a JSON string providing documentation to the user of this schema (optional). +* _aliases_: a JSON array of strings, providing alternate names for this record (optional). +* _fields_: a JSON array, listing fields (required). Each field is a JSON object with the following attributes: + * _name_: a JSON string providing the name of the field (required), and + * _doc_: a JSON string describing this field for users (optional). + * _type_: a [schema]({{< ref "#schema-declaration" >}} "Schema declaration"), as defined above + * _default_: A default value for this field, only used when reading instances that lack the field for schema evolution purposes. The presence of a default value does not make the field optional at encoding time. Permitted values depend on the field's schema type, according to the table below. Default values for union fields correspond to the first schema in the union. Default values for bytes and fixed fields are JSON strings, where Unicode code points 0-255 are mapped to unsigned 8-bit byte values 0-255. Avro encodes a field even if its value is equal to its default. + +*field default values* + +| **avro type** | **json type** | **example** | +|---------------|----------------|-------------| +| null | null | `null` | +| boolean | boolean | `true` | +| int,long | integer | `1` | +| float,double | number | `1.1` | +| bytes | string | `"\u00FF"` | +| string | string | `"foo"` | +| record | object | `{"a": 1}` | +| enum | string | `"FOO"` | +| array | array | `[1]` | +| map | object | `{"a": 1}` | +| fixed | string | `"\u00ff"` | + + * _order_: specifies how this field impacts sort ordering of this record (optional). Valid values are "ascending" (the default), "descending", or "ignore". For more details on how this is used, see the sort order section below. + * _aliases_: a JSON array of strings, providing alternate names for this field (optional). + +For example, a linked-list of 64-bit values may be defined with: +```json +{ + "type": "record", + "name": "LongList", + "aliases": ["LinkedLongs"], // old name for this + "fields" : [ + {"name": "value", "type": "long"}, // each element has a long + {"name": "next", "type": ["null", "LongList"]} // optional next element + ] +} +``` + +### Enums +Enums use the type name "enum" and support the following attributes: + +* _name_: a JSON string providing the name of the enum (required). +* _namespace_, a JSON string that qualifies the name (optional); +* _aliases_: a JSON array of strings, providing alternate names for this enum (optional). +* _doc_: a JSON string providing documentation to the user of this schema (optional). +* _symbols_: a JSON array, listing symbols, as JSON strings (required). All symbols in an enum must be unique; duplicates are prohibited. Every symbol must match the regular expression [A-Za-z_][A-Za-z0-9_]* (the same requirement as for [names]({{< ref "#names" >}} "Names")). +* _default_: A default value for this enumeration, used during resolution when the reader encounters a symbol from the writer that isn't defined in the reader's schema (optional). The value provided here must be a JSON string that's a member of the symbols array. See documentation on schema resolution for how this gets used. + +For example, playing card suits might be defined with: +```json +{ + "type": "enum", + "name": "Suit", + "symbols" : ["SPADES", "HEARTS", "DIAMONDS", "CLUBS"] +} +``` + +### Arrays +Arrays use the type name "array" and support a single attribute: + +* _items_: the schema of the array's items. + +For example, an array of strings is declared with: +```json +{ + "type": "array", + "items" : "string", + "default": [] +} +``` + +### Maps +Maps use the type name "map" and support one attribute: + +* _values_: the schema of the map's values. + +Map keys are assumed to be strings. + +For example, a map from string to long is declared with: +```json +{ + "type": "map", + "values" : "long", + "default": {} +} +``` + +### Unions +Unions, as mentioned above, are represented using JSON arrays. For example, `["null", "string"]` declares a schema which may be either a null or string. + +(Note that when a [default value]({{< ref "#schema-record" >}} "Schema record") is specified for a record field whose type is a union, the type of the default value must match the first element of the union. Thus, for unions containing "null", the "null" is usually listed first, since the default value of such unions is typically null.) + +Unions may not contain more than one schema with the same type, except for the named types record, fixed and enum. For example, unions containing two array types or two map types are not permitted, but two types with different names are permitted. (Names permit efficient resolution when reading and writing unions.) + +Unions may not immediately contain other unions. + +### Fixed +Fixed uses the type name "fixed" and supports the following attributes: + +* _name_: a string naming this fixed (required). +* _namespace_, a string that qualifies the name (optional); +* _aliases_: a JSON array of strings, providing alternate names for this enum (optional). +* _size_: an integer, specifying the number of bytes per value (required). + +For example, 16-byte quantity may be declared with: +```json +{"type": "fixed", "size": 16, "name": "md5"} +``` + +### Names {#names} +Record, enums and fixed are named types. Each has a fullname that is composed of two parts; a name and a namespace, separated by a dot. Equality of names is defined on the fullname. + +Record fields and enum symbols have names as well (but no namespace). Equality of fields and enum symbols is defined on the name of the field/symbol within its scope (the record/enum that defines it). Fields and enum symbols across scopes are never equal. + +The name portion of the fullname of named types, record field names, and enum symbols must: + +* start with [A-Za-z_] +* subsequently contain only [A-Za-z0-9_] + +A namespace is a dot-separated sequence of such names. The empty string may also be used as a namespace to indicate the null namespace. Equality of names (including field names and enum symbols) as well as fullnames is case-sensitive. + +The null namespace may not be used in a dot-separated sequence of names. So the grammar for a namespace is: +``` + | [()*] +``` + +In record, enum and fixed definitions, the fullname is determined according to the algorithm below the example: + +``` +{ + "type": "record", + "name": "Example", + "doc": "A simple name (attribute) and no namespace attribute: use the null namespace (\"\"); the fullname is 'Example'.", + "fields": [ + { + "name": "inheritNull", + "type": { + "type": "enum", + "name": "Simple", + "doc": "A simple name (attribute) and no namespace attribute: inherit the null namespace of the enclosing type 'Example'. The fullname is 'Simple'.", + "symbols": ["a", "b"] + } + }, { + "name": "explicitNamespace", + "type": { + "type": "fixed", + "name": "Simple", + "namespace": "explicit", + "doc": "A simple name (attribute) and a namespace (attribute); the fullname is 'explicit.Simple' (this is a different type than of the 'inheritNull' field).", + "size": 12 + } + }, { + "name": "fullName", + "type": { + "type": "record", + "name": "a.full.Name", + "namespace": "ignored", + "doc": "A name attribute with a fullname, so the namespace attribute is ignored. The fullname is 'a.full.Name', and the namespace is 'a.full'.", + "fields": [ + { + "name": "inheritNamespace", + "type": { + "type": "enum", + "name": "Understanding", + "doc": "A simple name (attribute) and no namespace attribute: inherit the namespace of the enclosing type 'a.full.Name'. The fullname is 'a.full.Understanding'.", + "symbols": ["d", "e"] + } + } + ] + } + } + ] +} +``` + +The fullname of a record, enum or fixed definition is determined by the required `name` and optional `namespace` attributes like this: + +* A fullname is specified. If the name specified contains a dot, then it is assumed to be a fullname, and any namespace also specified is ignored. For example, use "name": "org.foo.X" to indicate the fullname org.foo.X. +* A simple name (a name that contains no dots) and namespace are both specified. For example, one might use "name": "X", "namespace": "org.foo" to indicate the fullname org.foo.X. +* A simple name only is specified (a name that contains no dots). In this case the namespace is taken from the most tightly enclosing named schema or protocol, and the fullname is constructed from that namespace and the name. For example, if "name": "X" is specified, and this occurs within a field of the record definition of org.foo.Y, then the fullname is org.foo.X. This also happens if there is no enclosing namespace (i.e., the enclosing schema definition has the null namespace). + +References to previously defined names are as in the latter two cases above: if they contain a dot they are a fullname, if they do not contain a dot, the namespace is the namespace of the enclosing definition. + +Primitive type names (`null`, `boolean`, `int`, `long`, `float`, `double`, `bytes`, `string`) have no namespace and their names may not be defined in any namespace. + +Complex types (`record`, `enum`, `array`, `map`, `fixed`) have no namespace, but their names (as well as `union`) are permitted to be reused as type names. This can be confusing to the human reader, but is always unambiguous for binary serialization. Due to the limitations of JSON encoding, it is a best practice to use a namespace when using these names. + +A schema or protocol may not contain multiple definitions of a fullname. Further, a name must be defined before it is used ("before" in the depth-first, left-to-right traversal of the JSON parse tree, where the types attribute of a protocol is always deemed to come "before" the messages attribute.) + +### Aliases +Named types and fields may have aliases. An implementation may optionally use aliases to map a writer's schema to the reader's. This facilitates both schema evolution as well as processing disparate datasets. + +Aliases function by re-writing the writer's schema using aliases from the reader's schema. For example, if the writer's schema was named "Foo" and the reader's schema is named "Bar" and has an alias of "Foo", then the implementation would act as though "Foo" were named "Bar" when reading. Similarly, if data was written as a record with a field named "x" and is read as a record with a field named "y" with alias "x", then the implementation would act as though "x" were named "y" when reading. + +A type alias may be specified either as a fully namespace-qualified, or relative to the namespace of the name it is an alias for. For example, if a type named "a.b" has aliases of "c" and "x.y", then the fully qualified names of its aliases are "a.c" and "x.y". + +## Data Serialization and Deserialization +Binary encoded Avro data does not include type information or field names. The benefit is that the serialized data is small, but as a result a schema must always be used in order to read Avro data correctly. The best way to ensure that the schema is structurally identical to the one used to write the data is to use the exact same schema. + +Therefore, files or systems that store Avro data should always include the writer's schema for that data. Avro-based remote procedure call (RPC) systems must also guarantee that remote recipients of data have a copy of the schema used to write that data. In general, it is advisable that any reader of Avro data should use a schema that is the same (as defined more fully in [Parsing Canonical Form for Schemas]({{< ref "#parsing-canonical-form-for-schemas" >}} "Parsing Canonical Form for Schemas")) as the schema that was used to write the data in order to deserialize it correctly. Deserializing data into a newer schema is accomplished by specifying an additional schema, the results of which are described in [Schema Resolution]({{< ref "#schema-resolution" >}}). + +In general, both serialization and deserialization proceed as a depth-first, left-to-right traversal of the schema, serializing or deserializing primitive types as they are encountered. Therefore, it is possible, though not advisable, to read Avro data with a schema that does not have the same Parsing Canonical Form as the schema with which the data was written. In order for this to work, the serialized primitive values must be compatible, in order value by value, with the items in the deserialization schema. For example, int and long are always serialized the same way, so an int could be deserialized as a long. Since the compatibility of two schemas depends on both the data and the serialization format (eg. binary is more permissive than JSON because JSON includes field names, eg. a long that is too large will overflow an int), it is simpler and more reliable to use schemas with identical Parsing Canonical Form. + +### Encodings +Avro specifies two serialization encodings: binary and JSON. Most applications will use the binary encoding, as it is smaller and faster. But, for debugging and web-based applications, the JSON encoding may sometimes be appropriate. + +### Binary Encoding {#binary-encoding} +Binary encoding does not include field names, self-contained information about the types of individual bytes, nor field or record separators. Therefore readers are wholly reliant on the schema used when the data was encoded. + +#### Primitive Types +Primitive types are encoded in binary as follows: + +* _null_ is written as zero bytes. +* a _boolean_ is written as a single byte whose value is either 0 (false) or 1 (true). +* _int_ and _long_ values are written using [variable-length](https://lucene.apache.org/java/3_5_0/fileformats.html#VInt) [zig-zag](https://code.google.com/apis/protocolbuffers/docs/encoding.html#types) coding. Some examples: + +| *value* | *hex* | +|---|---| +| 0 | 00 | +|-1 | 01 | +| 1 | 02 | +|-2 | 03 | +| 2 | 04 | +|...|...| +|-64 | 7f | +|64 | 80 01| +|...|...| + +* a _float_ is written as 4 bytes. The float is converted into a 32-bit integer using a method equivalent to Java's [floatToIntBits](https://docs.oracle.com/javase/8/docs/api/java/lang/Float.html#floatToIntBits-float-) and then encoded in little-endian format. +* a _double_ is written as 8 bytes. The double is converted into a 64-bit integer using a method equivalent to Java's [doubleToLongBits](https://docs.oracle.com/javase/8/docs/api/java/lang/Double.html#doubleToLongBits-double-) and then encoded in little-endian format. +* _bytes_ are encoded as a long followed by that many bytes of data. +* a _string_ is encoded as a long followed by that many bytes of UTF-8 encoded character data. +For example, the three-character string "foo" would be encoded as the long value 3 (encoded as hex 06) followed by the UTF-8 encoding of 'f', 'o', and 'o' (the hex bytes 66 6f 6f): +``` +06 66 6f 6f +``` + +### Complex Types +Complex types are encoded in binary as follows: + +#### Records +A record is encoded by encoding the values of its fields in the order that they are declared. In other words, a record is encoded as just the concatenation of the encodings of its fields. Field values are encoded per their schema. + +For example, the record schema +```json +{ + "type": "record", + "name": "test", + "fields" : [ + {"name": "a", "type": "long"}, + {"name": "b", "type": "string"} + ] +} +``` + +An instance of this record whose a field has value 27 (encoded as hex 36) and whose b field has value "foo" (encoded as hex bytes 06 66 6f 6f), would be encoded simply as the concatenation of these, namely the hex byte sequence: +``` +36 06 66 6f 6f +``` + +#### Enums +An enum is encoded by a int, representing the zero-based position of the symbol in the schema. + +For example, consider the enum: +```json +{"type": "enum", "name": "Foo", "symbols": ["A", "B", "C", "D"] } +``` + +This would be encoded by an int between zero and three, with zero indicating "A", and 3 indicating "D". + +#### Arrays +Arrays are encoded as a series of blocks. Each block consists of a long count value, followed by that many array items. A block with count zero indicates the end of the array. Each item is encoded per the array's item schema. + +If a block's count is negative, its absolute value is used, and the count is followed immediately by a long block size indicating the number of bytes in the block. This block size permits fast skipping through data, e.g., when projecting a record to a subset of its fields. + +For example, the array schema +```json +{"type": "array", "items": "long"} +``` +an array containing the items 3 and 27 could be encoded as the long value 2 (encoded as hex 04) followed by long values 3 and 27 (encoded as hex 06 36) terminated by zero: +``` +04 06 36 00 +``` + +The blocked representation permits one to read and write arrays larger than can be buffered in memory, since one can start writing items without knowing the full length of the array. + +#### Maps {#schema-maps} +Maps are encoded as a series of _blocks_. Each block consists of a `long` _count_ value, followed by that many key/value pairs. A block with count zero indicates the end of the map. Each item is encoded per the map's value schema. + +If a block's count is negative, its absolute value is used, and the count is followed immediately by a `long` block size indicating the number of bytes in the block. This block size permits fast skipping through data, e.g., when projecting a record to a subset of its fields. + +The blocked representation permits one to read and write maps larger than can be buffered in memory, since one can start writing items without knowing the full length of the map. + +#### Unions +A union is encoded by first writing an `int` value indicating the zero-based position within the union of the schema of its value. The value is then encoded per the indicated schema within the union. + +For example, the union schema `["null","string"]` would encode: + +* _null_ as zero (the index of "null" in the union): +`00` +* the string "a" as one (the index of "string" in the union, 1, encoded as hex 02), followed by the serialized string: +`02 02 61` +NOTE: Currently for C/C++ implementations, the positions are practically an int, but theoretically a long. In reality, we don't expect unions with 215M members + +#### Fixed +Fixed instances are encoded using the number of bytes declared in the schema. + +### JSON Encoding +Except for unions, the JSON encoding is the same as is used to encode [field default values]({{< ref "#schema-record" >}}). + +The value of a union is encoded in JSON as follows: + +* if its type is _null_, then it is encoded as a JSON _null_; +* otherwise it is encoded as a JSON object with one name/value pair whose name is the type's name and whose value is the recursively encoded value. For Avro's named types (record, fixed or enum) the user-specified name is used, for other types the type name is used. + +For example, the union schema `["null","string","Foo"]`, where Foo is a record name, would encode: + +* _null_ as _null_; +* the string "a" as `{"string": "a"}` and +* a Foo instance as `{"Foo": {...}}`, where `{...}` indicates the JSON encoding of a Foo instance. + +Note that the original schema is still required to correctly process JSON-encoded data. For example, the JSON encoding does not distinguish between _int_ and _long_, _float_ and _double_, records and maps, enums and strings, etc. + +#### Single-object encoding +In some situations a single Avro serialized object is to be stored for a longer period of time. One very common example is storing Avro records for several weeks in an [Apache Kafka](https://kafka.apache.org/) topic. + +In the period after a schema change this persistence system will contain records that have been written with different schemas. So the need arises to know which schema was used to write a record to support schema evolution correctly. In most cases the schema itself is too large to include in the message, so this binary wrapper format supports the use case more effectively. + +##### Single object encoding specification +Single Avro objects are encoded as follows: + +1. A two-byte marker, `C3 01`, to show that the message is Avro and uses this single-record format (version 1). +1. The 8-byte little-endian CRC-64-AVRO [fingerprint]({{< ref "#schema-fingerprints" >}} "Schema fingerprints") of the object's schema. +1. The Avro object encoded using [Avro's binary encoding]({{< ref "#binary-encoding" >}}). + +Implementations use the 2-byte marker to determine whether a payload is Avro. This check helps avoid expensive lookups that resolve the schema from a fingerprint, when the message is not an encoded Avro payload. + +## Sort Order +Avro defines a standard sort order for data. This permits data written by one system to be efficiently sorted by another system. This can be an important optimization, as sort order comparisons are sometimes the most frequent per-object operation. Note also that Avro binary-encoded data can be efficiently ordered without deserializing it to objects. + +Data items may only be compared if they have identical schemas. Pairwise comparisons are implemented recursively with a depth-first, left-to-right traversal of the schema. The first mismatch encountered determines the order of the items. + +Two items with the same schema are compared according to the following rules. + +* _null_ data is always equal. +* _boolean_ data is ordered with false before true. +* _int_, _long_, _float_ and _double_ data is ordered by ascending numeric value. +* _bytes_ and fixed data are compared lexicographically by unsigned 8-bit values. +* _string_ data is compared lexicographically by Unicode code point. Note that since UTF-8 is used as the binary encoding for strings, sorting of bytes and string binary data is identical. +* _array_ data is compared lexicographically by element. +* _enum_ data is ordered by the symbol's position in the enum schema. For example, an enum whose symbols are `["z", "a"]` would sort "z" values before "a" values. +* _union_ data is first ordered by the branch within the union, and, within that, by the type of the branch. For example, an `["int", "string"]` union would order all int values before all string values, with the ints and strings themselves ordered as defined above. +* _record_ data is ordered lexicographically by field. If a field specifies that its order is: + * "ascending", then the order of its values is unaltered. + * "descending", then the order of its values is reversed. + * "ignore", then its values are ignored when sorting. +* _map_ data may not be compared. It is an error to attempt to compare data containing maps unless those maps are in an `"order":"ignore"` record field. + +## Object Container Files +Avro includes a simple object container file format. A file has a schema, and all objects stored in the file must be written according to that schema, using binary encoding. Objects are stored in blocks that may be compressed. Syncronization markers are used between blocks to permit efficient splitting of files for MapReduce processing. + +Files may include arbitrary user-specified metadata. + +A file consists of: + +* A file header, followed by +* one or more file data blocks. + +A file header consists of: + +* Four bytes, ASCII 'O', 'b', 'j', followed by 1. +* file metadata, including the schema. +* The 16-byte, randomly-generated sync marker for this file. + +File metadata is written as if defined by the following [map]({{< ref "#schema-maps" >}}) schema: +```json +{"type": "map", "values": "bytes"} +``` +All metadata properties that start with "avro." are reserved. The following file metadata properties are currently used: + +* **avro.schema** contains the schema of objects stored in the file, as JSON data (required). +* **avro.codec** the name of the compression codec used to compress blocks, as a string. Implementations are required to support the following codecs: "null" and "deflate". If codec is absent, it is assumed to be "null". The codecs are described with more detail below. + +A file header is thus described by the following schema: +```json +{"type": "record", "name": "org.apache.avro.file.Header", + "fields" : [ + {"name": "magic", "type": {"type": "fixed", "name": "Magic", "size": 4}}, + {"name": "meta", "type": {"type": "map", "values": "bytes"}}, + {"name": "sync", "type": {"type": "fixed", "name": "Sync", "size": 16}}, + ] +} +``` + +A file data block consists of: + +* A long indicating the count of objects in this block. +* A long indicating the size in bytes of the serialized objects in the current block, after any codec is applied +* The serialized objects. If a codec is specified, this is compressed by that codec. +* The file's 16-byte sync marker. + +Thus, each block's binary data can be efficiently extracted or skipped without deserializing the contents. The combination of block size, object counts, and sync markers enable detection of corrupt blocks and help ensure data integrity. + +### Required Codecs + +_null_ + +The "null" codec simply passes through data uncompressed. + +_deflate_ + +The "deflate" codec writes the data block using the deflate algorithm as specified in [RFC 1951](https://www.isi.edu/in-notes/rfc1951.txt), and typically implemented using the zlib library. Note that this format (unlike the "zlib format" in RFC 1950) does not have a checksum. + +### Optional Codecs +_bzip2_ + +The "bzip2" codec uses the [bzip2](https://sourceware.org/bzip2/) compression library. + +_snappy_ + +The "snappy" codec uses Google's [Snappy](https://code.google.com/p/snappy/) compression library. Each compressed block is followed by the 4-byte, big-endian CRC32 checksum of the uncompressed data in the block. + +_xz_ + +The "xz" codec uses the [XZ](https://tukaani.org/xz/) compression library. + +_zstandard_ + +The "zstandard" codec uses Facebook's [Zstandard](https://facebook.github.io/zstd/) compression library. + +### Protocol Declaration +Avro protocols describe RPC interfaces. Like schemas, they are defined with JSON text. + +A protocol is a JSON object with the following attributes: + +* _protocol_, a string, the name of the protocol (required); +* _namespace_, an optional string that qualifies the name (optional); +* _doc_, an optional string describing this protocol; +* _types_, an optional list of definitions of named types (records, enums, fixed and errors). An error definition is just like a record definition except it uses "error" instead of "record". Note that forward references to named types are not permitted. +* _messages_, an optional JSON object whose keys are message names and whose values are objects whose attributes are described below. No two messages may have the same name. + +The name and namespace qualification rules defined for schema objects apply to protocols as well. + +### Messages +A message has attributes: + +* a _doc_, an optional description of the message, +* a _request_, a list of named, typed parameter schemas (this has the same form as the fields of a record declaration); +* a _response_ schema; +* an optional union of declared error schemas. The effective union has "string" prepended to the declared union, to permit transmission of undeclared "system" errors. For example, if the declared error union is `["AccessError"]`, then the effective union is `["string", "AccessError"]`. When no errors are declared, the effective error union is `["string"]`. Errors are serialized using the effective union; however, a protocol's JSON declaration contains only the declared union. +* an optional one-way boolean parameter. + +A request parameter list is processed equivalently to an anonymous record. Since record field lists may vary between reader and writer, request parameters may also differ between the caller and responder, and such differences are resolved in the same manner as record field differences. + +The one-way parameter may only be true when the response type is `"null"` and no errors are listed. + +### Sample Protocol +For example, one may define a simple HelloWorld protocol with: +```json +{ + "namespace": "com.acme", + "protocol": "HelloWorld", + "doc": "Protocol Greetings", + + "types": [ + {"name": "Greeting", "type": "record", "fields": [ + {"name": "message", "type": "string"}]}, + {"name": "Curse", "type": "error", "fields": [ + {"name": "message", "type": "string"}]} + ], + + "messages": { + "hello": { + "doc": "Say hello.", + "request": [{"name": "greeting", "type": "Greeting" }], + "response": "Greeting", + "errors": ["Curse"] + } + } +} +``` + +## Protocol Wire Format + +### Message Transport +Messages may be transmitted via different transport mechanisms. + +To the transport, a _message_ is an opaque byte sequence. + +A transport is a system that supports: + +* **transmission of request messages** +* **receipt of corresponding response messages** +Servers may send a response message back to the client corresponding to a request message. The mechanism of correspondance is transport-specific. For example, in HTTP it is implicit, since HTTP directly supports requests and responses. But a transport that multiplexes many client threads over a single socket would need to tag messages with unique identifiers. + +Transports may be either stateless or stateful. In a stateless transport, messaging assumes no established connection state, while stateful transports establish connections that may be used for multiple messages. This distinction is discussed further in the [handshake](#handshake) section below. + +#### HTTP as Transport +When [HTTP](https://www.w3.org/Protocols/rfc2616/rfc2616.html) is used as a transport, each Avro message exchange is an HTTP request/response pair. All messages of an Avro protocol should share a single URL at an HTTP server. Other protocols may also use that URL. Both normal and error Avro response messages should use the 200 (OK) response code. The chunked encoding may be used for requests and responses, but, regardless the Avro request and response are the entire content of an HTTP request and response. The HTTP Content-Type of requests and responses should be specified as "avro/binary". Requests should be made using the POST method. + +HTTP is used by Avro as a stateless transport. + +### Message Framing +Avro messages are _framed_ as a list of buffers. + +Framing is a layer between messages and the transport. It exists to optimize certain operations. + +The format of framed message data is: + +* a series of buffers, where each buffer consists of: + * a four-byte, big-endian _buffer length_, followed by + * that many bytes of _buffer_ data. +* a message is always terminated by a zero-length buffer. + +Framing is transparent to request and response message formats (described below). Any message may be presented as a single or multiple buffers. + +Framing can permit readers to more efficiently get different buffers from different sources and for writers to more efficiently store different buffers to different destinations. In particular, it can reduce the number of times large binary objects are copied. For example, if an RPC parameter consists of a megabyte of file data, that data can be copied directly to a socket from a file descriptor, and, on the other end, it could be written directly to a file descriptor, never entering user space. + +A simple, recommended, framing policy is for writers to create a new segment whenever a single binary object is written that is larger than a normal output buffer. Small objects are then appended in buffers, while larger objects are written as their own buffers. When a reader then tries to read a large object the runtime can hand it an entire buffer directly, without having to copy it. + +### Handshake +The purpose of the handshake is to ensure that the client and the server have each other's protocol definition, so that the client can correctly deserialize responses, and the server can correctly deserialize requests. Both clients and servers should maintain a cache of recently seen protocols, so that, in most cases, a handshake will be completed without extra round-trip network exchanges or the transmission of full protocol text. + +RPC requests and responses may not be processed until a handshake has been completed. With a stateless transport, all requests and responses are prefixed by handshakes. With a stateful transport, handshakes are only attached to requests and responses until a successful handshake response has been returned over a connection. After this, request and response payloads are sent without handshakes for the lifetime of that connection. + +The handshake process uses the following record schemas: +```json +{ + "type": "record", + "name": "HandshakeRequest", "namespace":"org.apache.avro.ipc", + "fields": [ + {"name": "clientHash", + "type": {"type": "fixed", "name": "MD5", "size": 16}}, + {"name": "clientProtocol", "type": ["null", "string"]}, + {"name": "serverHash", "type": "MD5"}, + {"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]} + ] +} +{ + "type": "record", + "name": "HandshakeResponse", "namespace": "org.apache.avro.ipc", + "fields": [ + {"name": "match", + "type": {"type": "enum", "name": "HandshakeMatch", + "symbols": ["BOTH", "CLIENT", "NONE"]}}, + {"name": "serverProtocol", + "type": ["null", "string"]}, + {"name": "serverHash", + "type": ["null", {"type": "fixed", "name": "MD5", "size": 16}]}, + {"name": "meta", + "type": ["null", {"type": "map", "values": "bytes"}]} + ] +} +``` + +* A client first prefixes each request with a `HandshakeRequest` containing just the hash of its protocol and of the server's protocol (`clientHash!=null, clientProtocol=null, serverHash!=null`), where the hashes are 128-bit MD5 hashes of the JSON protocol text. If a client has never connected to a given server, it sends its hash as a guess of the server's hash, otherwise it sends the hash that it previously obtained from this server. +The server responds with a HandshakeResponse containing one of: + * `match=BOTH, serverProtocol=null, serverHash=null` if the client sent the valid hash of the server's protocol and the server knows what protocol corresponds to the client's hash. In this case, the request is complete and the response data immediately follows the HandshakeResponse. + * `match=CLIENT, serverProtocol!=null, serverHash!=null` if the server has previously seen the client's protocol, but the client sent an incorrect hash of the server's protocol. The request is complete and the response data immediately follows the HandshakeResponse. The client must use the returned protocol to process the response and should also cache that protocol and its hash for future interactions with this server. + * `match=NONE` if the server has not previously seen the client's protocol. The serverHash and serverProtocol may also be non-null if the server's protocol hash was incorrect. +In this case the client must then re-submit its request with its protocol text (`clientHash!=null, clientProtocol!=null, serverHash!=null`) and the server should respond with a successful match (match=BOTH, serverProtocol=null, serverHash=null) as above. + +The meta field is reserved for future handshake enhancements. + +### Call Format +A _call_ consists of a request message paired with its resulting response or error message. Requests and responses contain extensible metadata, and both kinds of messages are framed as described above. + +The format of a call request is: + +* _request metadata_, a map with values of type bytes +* the _message name_, an Avro string, followed by +* the _message parameters_. Parameters are serialized according to the message's request declaration. +When the empty string is used as a message name a server should ignore the parameters and return an empty response. A client may use this to ping a server or to perform a handshake without sending a protocol message. + +When a message is declared one-way and a stateful connection has been established by a successful handshake response, no response data is sent. Otherwise the format of the call response is: + +* _response metadata_, a map with values of type bytes +* a one-byte error _flag_ boolean, followed by either: + * if the error flag is false, the message _response_, serialized per the message's response schema. + * if the error flag is true, the _error_, serialized per the message's effective error union schema. + +### Schema Resolution {#schema-resolution} +A reader of Avro data, whether from an RPC or a file, can always parse that data because the original schema must be provided along with the data. However, the reader may be programmed to read data into a different schema. For example, if the data was written with a different version of the software than it is read, then fields may have been added or removed from records. This section specifies how such schema differences should be resolved. + +We refer to the schema used to write the data as the writer's schema, and the schema that the application expects the reader's schema. Differences between these should be resolved as follows: + +* It is an error if the two schemas do not _match_. +To match, one of the following must hold: + * both schemas are arrays whose item types match + * both schemas are maps whose value types match + * both schemas are enums whose (unqualified) names match + * both schemas are fixed whose sizes and (unqualified) names match + * both schemas are records with the same (unqualified) name + * either schema is a union + * both schemas have same primitive type + * the writer's schema may be promoted to the reader's as follows: + * int is promotable to long, float, or double + * long is promotable to float or double + * float is promotable to double + * string is promotable to bytes + * bytes is promotable to string +* **if both are records**: + * the ordering of fields may be different: fields are matched by name. + * schemas for fields with the same name in both records are resolved recursively. + * if the writer's record contains a field with a name not present in the reader's record, the writer's value for that field is ignored. + * if the reader's record schema has a field that contains a default value, and writer's schema does not have a field with the same name, then the reader should use the default value from its field. + * if the reader's record schema has a field with no default value, and writer's schema does not have a field with the same name, an error is signalled. +* **if both are enums**: +if the writer's symbol is not present in the reader's enum and the reader has a default value, then that value is used, otherwise an error is signalled. + +* **if both are arrays**: +This resolution algorithm is applied recursively to the reader's and writer's array item schemas. + +* **if both are maps**: +This resolution algorithm is applied recursively to the reader's and writer's value schemas. + +* **if both are unions**: +The first schema in the reader's union that matches the selected writer's union schema is recursively resolved against it. if none match, an error is signalled. + +* **if reader's is a union, but writer's is not** +The first schema in the reader's union that matches the writer's schema is recursively resolved against it. If none match, an error is signalled. + +* **if writer's is a union, but reader's is not** +If the reader's schema matches the selected writer's schema, it is recursively resolved against it. If they do not match, an error is signalled. + +A schema's _doc_ fields are ignored for the purposes of schema resolution. Hence, the _doc_ portion of a schema may be dropped at serialization. + +### Parsing Canonical Form for Schemas {#parsing-canonical-form-for-schemas} +One of the defining characteristics of Avro is that a reader must use the schema used by the writer of the data in order to know how to read the data. This assumption results in a data format that's compact and also amenable to many forms of schema evolution. However, the specification so far has not defined what it means for the reader to have the "same" schema as the writer. Does the schema need to be textually identical? Well, clearly adding or removing some whitespace to a JSON expression does not change its meaning. At the same time, reordering the fields of records clearly does change the meaning. So what does it mean for a reader to have "the same" schema as a writer? + +Parsing Canonical Form is a transformation of a writer's schema that let's us define what it means for two schemas to be "the same" for the purpose of reading data written against the schema. It is called Parsing Canonical Form because the transformations strip away parts of the schema, like "doc" attributes, that are irrelevant to readers trying to parse incoming data. It is called Canonical Form because the transformations normalize the JSON text (such as the order of attributes) in a way that eliminates unimportant differences between schemas. If the Parsing Canonical Forms of two different schemas are textually equal, then those schemas are "the same" as far as any reader is concerned, i.e., there is no serialized data that would allow a reader to distinguish data generated by a writer using one of the original schemas from data generated by a writing using the other original schema. (We sketch a proof of this property in a companion document.) + +The next subsection specifies the transformations that define Parsing Canonical Form. But with a well-defined canonical form, it can be convenient to go one step further, transforming these canonical forms into simple integers ("fingerprints") that can be used to uniquely identify schemas. The subsection after next recommends some standard practices for generating such fingerprints. + +#### Transforming into Parsing Canonical Form +Assuming an input schema (in JSON form) that's already UTF-8 text for a _valid_ Avro schema (including all quotes as required by JSON), the following transformations will produce its Parsing Canonical Form: + +* [PRIMITIVES] Convert primitive schemas to their simple form (e.g., int instead of `{"type":"int"}`). +* [FULLNAMES] Replace short names with fullnames, using applicable namespaces to do so. Then eliminate namespace attributes, which are now redundant. +* [STRIP] Keep only attributes that are relevant to parsing data, which are: _type_, _name_, _fields_, _symbols_, _items_, _values_, _size_. Strip all others (e.g., _doc_ and _aliases_). +* [ORDER] Order the appearance of fields of JSON objects as follows: _name_, _type_, _fields_, _symbols_, _items_, _values_, _size_. For example, if an object has _type_, _name_, and _size_ fields, then the _name_ field should appear first, followed by the _type_ and then the _size_ fields. +* [STRINGS] For all JSON string literals in the schema text, replace any escaped characters (e.g., \uXXXX escapes) with their UTF-8 equivalents. +* [INTEGERS] Eliminate quotes around and any leading zeros in front of JSON integer literals (which appear in the _size_ attributes of _fixed_ schemas). +* [WHITESPACE] Eliminate all whitespace in JSON outside of string literals. + +#### Schema Fingerprints {#schema-fingerprints} +"[A] fingerprinting algorithm is a procedure that maps an arbitrarily large data item (such as a computer file) to a much shorter bit string, its fingerprint, that uniquely identifies the original data for all practical purposes" (quoted from [Wikipedia](https://en.wikipedia.org/wiki/Fingerprint_(computing))). In the Avro context, fingerprints of Parsing Canonical Form can be useful in a number of applications; for example, to cache encoder and decoder objects, to tag data items with a short substitute for the writer's full schema, and to quickly negotiate common-case schemas between readers and writers. + +In designing fingerprinting algorithms, there is a fundamental trade-off between the length of the fingerprint and the probability of collisions. To help application designers find appropriate points within this trade-off space, while encouraging interoperability and ease of implementation, we recommend using one of the following three algorithms when fingerprinting Avro schemas: + +* When applications can tolerate longer fingerprints, we recommend using the [SHA-256 digest algorithm](https://en.wikipedia.org/wiki/SHA-2) to generate 256-bit fingerprints of Parsing Canonical Forms. Most languages today have SHA-256 implementations in their libraries. +* At the opposite extreme, the smallest fingerprint we recommend is a 64-bit [Rabin fingerprint](https://en.wikipedia.org/wiki/Rabin_fingerprint). Below, we provide pseudo-code for this algorithm that can be easily translated into any programming language. 64-bit fingerprints should guarantee uniqueness for schema caches of up to a million entries (for such a cache, the chance of a collision is 3E-8). We don't recommend shorter fingerprints, as the chances of collisions is too great (for example, with 32-bit fingerprints, a cache with as few as 100,000 schemas has a 50% chance of having a collision). +* Between these two extremes, we recommend using the [MD5 message digest](https://en.wikipedia.org/wiki/MD5) to generate 128-bit fingerprints. These make sense only where very large numbers of schemas are being manipulated (tens of millions); otherwise, 64-bit fingerprints should be sufficient. As with SHA-256, MD5 implementations are found in most libraries today. + +These fingerprints are not meant to provide any security guarantees, even the longer SHA-256-based ones. Most Avro applications should be surrounded by security measures that prevent attackers from writing random data and otherwise interfering with the consumers of schemas. We recommend that these surrounding mechanisms be used to prevent collision and pre-image attacks (i.e., "forgery") on schema fingerprints, rather than relying on the security properties of the fingerprints themselves. + +Rabin fingerprints are [cyclic redundancy checks](https://en.wikipedia.org/wiki/Cyclic_redundancy_check) computed using irreducible polynomials. In the style of the Appendix of [RFC 1952](https://www.ietf.org/rfc/rfc1952.txt) (pg 10), which defines the CRC-32 algorithm, here's our definition of the 64-bit AVRO fingerprinting algorithm: +```java +long fingerprint64(byte[] buf) { + if (FP_TABLE == null) initFPTable(); + long fp = EMPTY; + for (int i = 0; i < buf.length; i++) + fp = (fp >>> 8) ^ FP_TABLE[(int)(fp ^ buf[i]) & 0xff]; + return fp; +} + +static long EMPTY = 0xc15d213aa4d7a795L; +static long[] FP_TABLE = null; + +void initFPTable() { + FP_TABLE = new long[256]; + for (int i = 0; i < 256; i++) { + long fp = i; + for (int j = 0; j < 8; j++) + fp = (fp >>> 1) ^ (EMPTY & -(fp & 1L)); + FP_TABLE[i] = fp; + } +} +``` + +Readers interested in the mathematics behind this algorithm may want to read [Chapter 14 of the Second Edition of Hacker's Delight](https://books.google.com/books?id=XD9iAwAAQBAJ&pg=PA319). (Unlike RFC-1952 and the book chapter, we prepend a single one bit to messages. We do this because CRCs ignore leading zero bits, which can be problematic. Our code prepends a one-bit by initializing fingerprints using EMPTY, rather than initializing using zero as in RFC-1952 and the book chapter.) + +## Logical Types +A logical type is an Avro primitive or complex type with extra attributes to represent a derived type. The attribute `logicalType` must always be present for a logical type, and is a string with the name of one of the logical types listed later in this section. Other attributes may be defined for particular logical types. + +A logical type is always serialized using its underlying Avro type so that values are encoded in exactly the same way as the equivalent Avro type that does not have a `logicalType` attribute. Language implementations may choose to represent logical types with an appropriate native type, although this is not required. + +Language implementations must ignore unknown logical types when reading, and should use the underlying Avro type. If a logical type is invalid, for example a decimal with scale greater than its precision, then implementations should ignore the logical type and use the underlying Avro type. + +### Decimal +The `decimal` logical type represents an arbitrary-precision signed decimal number of the form _unscaled × 10-scale_. + +A `decimal` logical type annotates Avro _bytes_ or _fixed_ types. The byte array must contain the two's-complement representation of the unscaled integer value in big-endian byte order. The scale is fixed, and is specified using an attribute. + +The following attributes are supported: + +* _scale_, a JSON integer representing the scale (optional). If not specified the scale is 0. +* _precision_, a JSON integer representing the (maximum) precision of decimals stored in this type (required). +For example, the following schema represents decimal numbers with a maximum precision of 4 and a scale of 2: +```json +{ + "type": "bytes", + "logicalType": "decimal", + "precision": 4, + "scale": 2 +} +``` +Precision must be a positive integer greater than zero. If the underlying type is a _fixed_, then the precision is limited by its size. An array of length n can store at most _floor(log10(28 × n - 1 - 1))_ base-10 digits of precision. + +Scale must be zero or a positive integer less than or equal to the precision. + +For the purposes of schema resolution, two schemas that are `decimal` logical types _match_ if their scales and precisions match. + +### UUID +The `uuid` logical type represents a random generated universally unique identifier (UUID). + +A `uuid` logical type annotates an Avro `string`. The string has to conform with [RFC-4122](https://www.ietf.org/rfc/rfc4122.txt) + +### Date +The `date` logical type represents a date within the calendar, with no reference to a particular time zone or time of day. + +A `date` logical type annotates an Avro `int`, where the int stores the number of days from the unix epoch, 1 January 1970 (ISO calendar). + +The following schema represents a date: +```json +{ + "type": "int", + "logicalType": "date" +} +``` + +### Time (millisecond precision) +The `time-millis` logical type represents a time of day, with no reference to a particular calendar, time zone or date, with a precision of one millisecond. + +A `time-millis` logical type annotates an Avro `int`, where the int stores the number of milliseconds after midnight, 00:00:00.000. + +### Time (microsecond precision) +The `time-micros` logical type represents a time of day, with no reference to a particular calendar, time zone or date, with a precision of one microsecond. + +A `time-micros` logical type annotates an Avro long, where the long stores the number of microseconds after midnight, 00:00:00.000000. + +### Timestamp (millisecond precision) +The `timestamp-millis` logical type represents an instant on the global timeline, independent of a particular time zone or calendar, with a precision of one millisecond. Please note that time zone information gets lost in this process. Upon reading a value back, we can only reconstruct the instant, but not the original representation. In practice, such timestamps are typically displayed to users in their local time zones, therefore they may be displayed differently depending on the execution environment. + +A `timestamp-millis` logical type annotates an Avro long, where the long stores the number of milliseconds from the unix epoch, 1 January 1970 00:00:00.000 UTC. + +### Timestamp (microsecond precision) +The `timestamp-micros` logical type represents an instant on the global timeline, independent of a particular time zone or calendar, with a precision of one microsecond. Please note that time zone information gets lost in this process. Upon reading a value back, we can only reconstruct the instant, but not the original representation. In practice, such timestamps are typically displayed to users in their local time zones, therefore they may be displayed differently depending on the execution environment. + +A `timestamp-micros` logical type annotates an Avro long, where the long stores the number of microseconds from the unix epoch, 1 January 1970 00:00:00.000000 UTC. + +### Local timestamp (millisecond precision) +The `local-timestamp-millis` logical type represents a timestamp in a local timezone, regardless of what specific time zone is considered local, with a precision of one millisecond. + +A `local-timestamp-millis` logical type annotates an Avro long, where the long stores the number of milliseconds, from 1 January 1970 00:00:00.000. + +### Local timestamp (microsecond precision) +The `local-timestamp-micros` logical type represents a timestamp in a local timezone, regardless of what specific time zone is considered local, with a precision of one microsecond. + +A `local-timestamp-micros` logical type annotates an Avro long, where the long stores the number of microseconds, from 1 January 1970 00:00:00.000000. + +### Duration +The `duration` logical type represents an amount of time defined by a number of months, days and milliseconds. This is not equivalent to a number of milliseconds, because, depending on the moment in time from which the duration is measured, the number of days in the month and number of milliseconds in a day may differ. Other standard periods such as years, quarters, hours and minutes can be expressed through these basic periods. + +A `duration` logical type annotates Avro `fixed` type of size 12, which stores three little-endian unsigned integers that represent durations at different granularities of time. The first stores a number in months, the second stores a number in days, and the third stores a number in milliseconds. diff --git a/doc/content/en/docs/1.11.1/_index.md b/doc/content/en/docs/1.11.1/_index.md new file mode 100755 index 00000000000..2600f21bc17 --- /dev/null +++ b/doc/content/en/docs/1.11.1/_index.md @@ -0,0 +1,59 @@ +--- +title: "Apache Avroâ„ĸ 1.11.1 Documentation" +linkTitle: "1.11.1" +type: docs +weight: -1111 +--- + + + +## Introduction + +Apache Avroâ„ĸ is a data serialization system. + +Avro provides: + +* Rich data structures. +* A compact, fast, binary data format. +* A container file, to store persistent data. +* Remote procedure call (RPC). +* Simple integration with dynamic languages. Code generation is not required to read or write data files nor to use or implement RPC protocols. Code generation as an optional optimization, only worth implementing for statically typed languages. + +## Schemas + +Avro relies on schemas. When Avro data is read, the schema used when writing it is always present. This permits each datum to be written with no per-value overheads, making serialization both fast and small. This also facilitates use with dynamic, scripting languages, since data, together with its schema, is fully self-describing. + +When Avro data is stored in a file, its schema is stored with it, so that files may be processed later by any program. If the program reading the data expects a different schema this can be easily resolved, since both schemas are present. + +When Avro is used in RPC, the client and server exchange schemas in the connection handshake. (This can be optimized so that, for most calls, no schemas are actually transmitted.) Since both client and server both have the other's full schema, correspondence between same named fields, missing fields, extra fields, etc. can all be easily resolved. + +Avro schemas are defined with JSON . This facilitates implementation in languages that already have JSON libraries. + +## Comparison with other systems + +Avro provides functionality similar to systems such as [Thrift](https://thrift.apache.org/), [Protocol Buffers](https://code.google.com/p/protobuf/), etc. Avro differs from these systems in the following fundamental aspects. + +* Dynamic typing: Avro does not require that code be generated. Data is always accompanied by a schema that permits full processing of that data without code generation, static datatypes, etc. This facilitates construction of generic data-processing systems and languages. +* Untagged data: Since the schema is present when data is read, considerably less type information need be encoded with data, resulting in smaller serialization size. +* No manually-assigned field IDs: When a schema changes, both the old and new schema are always present when processing data, so differences may be resolved symbolically, using field names. + + diff --git a/doc/content/en/docs/1.11.1/api-c++.md b/doc/content/en/docs/1.11.1/api-c++.md new file mode 100644 index 00000000000..3740ca43758 --- /dev/null +++ b/doc/content/en/docs/1.11.1/api-c++.md @@ -0,0 +1,29 @@ +--- +title: "C++ API" +linkTitle: "C++ API" +weight: 102 +manualLink: /docs/1.11.1/api/cpp/html/ +--- + + + +The C++ API documentation can be found here. diff --git a/doc/content/en/docs/1.11.1/api-c.md b/doc/content/en/docs/1.11.1/api-c.md new file mode 100644 index 00000000000..847761b81a2 --- /dev/null +++ b/doc/content/en/docs/1.11.1/api-c.md @@ -0,0 +1,29 @@ +--- +title: "C API" +linkTitle: "C API" +weight: 101 +manualLink: /docs/1.11.1/api/c/ +--- + + + +The C API documentation can be found here. diff --git a/doc/content/en/docs/1.11.1/api-csharp.md b/doc/content/en/docs/1.11.1/api-csharp.md new file mode 100644 index 00000000000..789ebd8cd55 --- /dev/null +++ b/doc/content/en/docs/1.11.1/api-csharp.md @@ -0,0 +1,29 @@ +--- +title: "C# API" +linkTitle: "C# API" +weight: 103 +manualLink: /docs/1.11.1/api/csharp/html/ +--- + + + +The C# API documentation can be found here. diff --git a/doc/content/en/docs/1.11.1/api-java.md b/doc/content/en/docs/1.11.1/api-java.md new file mode 100644 index 00000000000..b06def2615e --- /dev/null +++ b/doc/content/en/docs/1.11.1/api-java.md @@ -0,0 +1,29 @@ +--- +title: "Java API" +linkTitle: "Java API" +weight: 100 +manualLink: /docs/1.11.1/api/java/ +--- + + + +The Javadocs can be found here. diff --git a/doc/content/en/docs/1.11.1/logo.svg b/doc/content/en/docs/1.11.1/logo.svg new file mode 100644 index 00000000000..b44ed197262 --- /dev/null +++ b/doc/content/en/docs/1.11.1/logo.svg @@ -0,0 +1,22 @@ + + + diff --git a/doc/content/en/docs/1.11.2/Getting started (Java)/_index.md b/doc/content/en/docs/1.11.2/Getting started (Java)/_index.md new file mode 100644 index 00000000000..20a680b1d88 --- /dev/null +++ b/doc/content/en/docs/1.11.2/Getting started (Java)/_index.md @@ -0,0 +1,289 @@ +--- +categories: [] +tags: ["java"] +title: "Getting Started (Java)" +linkTitle: "Getting Started (Java)" +weight: 2 +--- + + + +This is a short guide for getting started with Apache Avroâ„ĸ using Java. This guide only covers using Avro for data serialization; see Patrick Hunt's [Avro RPC Quick Start](https://github.com/phunt/avro-rpc-quickstart) for a good introduction to using Avro for RPC. + +## Download + +Avro implementations for C, C++, C#, Java, PHP, Python, and Ruby can be downloaded from the [Apache Avroâ„ĸ Download]({{< relref "/project/download" >}}) page. This guide uses Avro 1.11.2, the latest version at the time of writing. For the examples in this guide, download avro-1.11.2.jar and avro-tools-1.11.2.jar. + +Alternatively, if you are using Maven, add the following dependency to your POM: + +```xml + + org.apache.avro + avro + 1.11.2 + +``` + +As well as the Avro Maven plugin (for performing code generation): + +```xml + + org.apache.avro + avro-maven-plugin + 1.11.2 + + + generate-sources + + schema + + + ${project.basedir}/src/main/avro/ + ${project.basedir}/src/main/java/ + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + 1.8 + 1.8 + + +``` + +You may also build the required Avro jars from source. Building Avro is beyond the scope of this guide; see the Build Documentation page in the wiki for more information. + +## Defining a schema + +Avro schemas are defined using JSON. Schemas are composed of primitive types (null, boolean, int, long, float, double, bytes, and string) and complex types (record, enum, array, map, union, and fixed). You can learn more about Avro schemas and types from the specification, but for now let's start with a simple schema example, user.avsc: + +```json +{"namespace": "example.avro", + "type": "record", + "name": "User", + "fields": [ + {"name": "name", "type": "string"}, + {"name": "favorite_number", "type": ["int", "null"]}, + {"name": "favorite_color", "type": ["string", "null"]} + ] +} +``` + +This schema defines a record representing a hypothetical user. (Note that a schema file can only contain a single schema definition.) At minimum, a record definition must include its type ("type": "record"), a name ("name": "User"), and fields, in this case name, favorite_number, and favorite_color. We also define a namespace ("namespace": "example.avro"), which together with the name attribute defines the "full name" of the schema (example.avro.User in this case). + +Fields are defined via an array of objects, each of which defines a name and type (other attributes are optional, see the record specification for more details). The type attribute of a field is another schema object, which can be either a primitive or complex type. For example, the name field of our User schema is the primitive type string, whereas the favorite_number and favorite_color fields are both unions, represented by JSON arrays. unions are a complex type that can be any of the types listed in the array; e.g., favorite_number can either be an int or null, essentially making it an optional field. + +## Serializing and deserializing with code generation + +### Compiling the schema +Code generation allows us to automatically create classes based on our previously-defined schema. Once we have defined the relevant classes, there is no need to use the schema directly in our programs. We use the avro-tools jar to generate code as follows: + +```shell +java -jar /path/to/avro-tools-1.11.2.jar compile schema +``` + +This will generate the appropriate source files in a package based on the schema's namespace in the provided destination folder. For instance, to generate a User class in package example.avro from the schema defined above, run + +```shell +java -jar /path/to/avro-tools-1.11.2.jar compile schema user.avsc . +``` + +Note that if you using the Avro Maven plugin, there is no need to manually invoke the schema compiler; the plugin automatically performs code generation on any .avsc files present in the configured source directory. + +### Creating Users +Now that we've completed the code generation, let's create some Users, serialize them to a data file on disk, and then read back the file and deserialize the User objects. + +First let's create some Users and set their fields. + +```java +User user1 = new User(); +user1.setName("Alyssa"); +user1.setFavoriteNumber(256); +// Leave favorite color null + +// Alternate constructor +User user2 = new User("Ben", 7, "red"); + +// Construct via builder +User user3 = User.newBuilder() + .setName("Charlie") + .setFavoriteColor("blue") + .setFavoriteNumber(null) + .build(); +``` + +As shown in this example, Avro objects can be created either by invoking a constructor directly or by using a builder. Unlike constructors, builders will automatically set any default values specified in the schema. Additionally, builders validate the data as it set, whereas objects constructed directly will not cause an error until the object is serialized. However, using constructors directly generally offers better performance, as builders create a copy of the datastructure before it is written. + +Note that we do not set user1's favorite color. Since that record is of type ["string", "null"], we can either set it to a string or leave it null; it is essentially optional. Similarly, we set user3's favorite number to null (using a builder requires setting all fields, even if they are null). + +### Serializing +Now let's serialize our Users to disk. + +```java +// Serialize user1, user2 and user3 to disk +DatumWriter userDatumWriter = new SpecificDatumWriter(User.class); +DataFileWriter dataFileWriter = new DataFileWriter(userDatumWriter); +dataFileWriter.create(user1.getSchema(), new File("users.avro")); +dataFileWriter.append(user1); +dataFileWriter.append(user2); +dataFileWriter.append(user3); +dataFileWriter.close(); +``` + +We create a DatumWriter, which converts Java objects into an in-memory serialized format. The SpecificDatumWriter class is used with generated classes and extracts the schema from the specified generated type. + +Next we create a DataFileWriter, which writes the serialized records, as well as the schema, to the file specified in the dataFileWriter.create call. We write our users to the file via calls to the dataFileWriter.append method. When we are done writing, we close the data file. + +### Deserializing +Finally, let's deserialize the data file we just created. + +```java +// Deserialize Users from disk +DatumReader userDatumReader = new SpecificDatumReader(User.class); +DataFileReader dataFileReader = new DataFileReader(file, userDatumReader); +User user = null; +while (dataFileReader.hasNext()) { +// Reuse user object by passing it to next(). This saves us from +// allocating and garbage collecting many objects for files with +// many items. +user = dataFileReader.next(user); +System.out.println(user); +} +``` + +This snippet will output: + +```json +{"name": "Alyssa", "favorite_number": 256, "favorite_color": null} +{"name": "Ben", "favorite_number": 7, "favorite_color": "red"} +{"name": "Charlie", "favorite_number": null, "favorite_color": "blue"} +``` + +Deserializing is very similar to serializing. We create a SpecificDatumReader, analogous to the SpecificDatumWriter we used in serialization, which converts in-memory serialized items into instances of our generated class, in this case User. We pass the DatumReader and the previously created File to a DataFileReader, analogous to the DataFileWriter, which reads both the schema used by the writer as well as the data from the file on disk. The data will be read using the writer's schema included in the file and the schema provided by the reader, in this case the User class. The writer's schema is needed to know the order in which fields were written, while the reader's schema is needed to know what fields are expected and how to fill in default values for fields added since the file was written. If there are differences between the two schemas, they are resolved according to the Schema Resolution specification. + +Next we use the DataFileReader to iterate through the serialized Users and print the deserialized object to stdout. Note how we perform the iteration: we create a single User object which we store the current deserialized user in, and pass this record object to every call of dataFileReader.next. This is a performance optimization that allows the DataFileReader to reuse the same User object rather than allocating a new User for every iteration, which can be very expensive in terms of object allocation and garbage collection if we deserialize a large data file. While this technique is the standard way to iterate through a data file, it's also possible to use for (User user : dataFileReader) if performance is not a concern. + +### Compiling and running the example code +This example code is included as a Maven project in the examples/java-example directory in the Avro docs. From this directory, execute the following commands to build and run the example: + +```shell +$ mvn compile # includes code generation via Avro Maven plugin +$ mvn -q exec:java -Dexec.mainClass=example.SpecificMain +``` + +### Beta feature: Generating faster code +In release 1.9.0, we introduced a new approach to generating code that speeds up decoding of objects by more than 10% and encoding by more than 30% (future performance enhancements are underway). To ensure a smooth introduction of this change into production systems, this feature is controlled by a feature flag, the system property org.apache.avro.specific.use_custom_coders. In this first release, this feature is off by default. To turn it on, set the system flag to true at runtime. In the sample above, for example, you could enable the fater coders as follows: + +$ mvn -q exec:java -Dexec.mainClass=example.SpecificMain \ + -Dorg.apache.avro.specific.use_custom_coders=true + +Note that you do not have to recompile your Avro schema to have access to this feature. The feature is compiled and built into your code, and you turn it on and off at runtime using the feature flag. As a result, you can turn it on during testing, for example, and then off in production. Or you can turn it on in production, and quickly turn it off if something breaks. + +We encourage the Avro community to exercise this new feature early to help build confidence. (For those paying one-demand for compute resources in the cloud, it can lead to meaningful cost savings.) As confidence builds, we will turn this feature on by default, and eventually eliminate the feature flag (and the old code). + +## Serializing and deserializing without code generation +Data in Avro is always stored with its corresponding schema, meaning we can always read a serialized item regardless of whether we know the schema ahead of time. This allows us to perform serialization and deserialization without code generation. + +Let's go over the same example as in the previous section, but without using code generation: we'll create some users, serialize them to a data file on disk, and then read back the file and deserialize the users objects. + +### Creating users +First, we use a Parser to read our schema definition and create a Schema object. + +```java +Schema schema = new Schema.Parser().parse(new File("user.avsc")); +``` + +Using this schema, let's create some users. + +```java +GenericRecord user1 = new GenericData.Record(schema); +user1.put("name", "Alyssa"); +user1.put("favorite_number", 256); +// Leave favorite color null + +GenericRecord user2 = new GenericData.Record(schema); +user2.put("name", "Ben"); +user2.put("favorite_number", 7); +user2.put("favorite_color", "red"); +``` + +Since we're not using code generation, we use GenericRecords to represent users. GenericRecord uses the schema to verify that we only specify valid fields. If we try to set a non-existent field (e.g., user1.put("favorite_animal", "cat")), we'll get an AvroRuntimeException when we run the program. + +Note that we do not set user1's favorite color. Since that record is of type ["string", "null"], we can either set it to a string or leave it null; it is essentially optional. + +### Serializing +Now that we've created our user objects, serializing and deserializing them is almost identical to the example above which uses code generation. The main difference is that we use generic instead of specific readers and writers. + +First we'll serialize our users to a data file on disk. + +```java +// Serialize user1 and user2 to disk +File file = new File("users.avro"); +DatumWriter datumWriter = new GenericDatumWriter(schema); +DataFileWriter dataFileWriter = new DataFileWriter(datumWriter); +dataFileWriter.create(schema, file); +dataFileWriter.append(user1); +dataFileWriter.append(user2); +dataFileWriter.close(); +``` + +We create a DatumWriter, which converts Java objects into an in-memory serialized format. Since we are not using code generation, we create a GenericDatumWriter. It requires the schema both to determine how to write the GenericRecords and to verify that all non-nullable fields are present. + +As in the code generation example, we also create a DataFileWriter, which writes the serialized records, as well as the schema, to the file specified in the dataFileWriter.create call. We write our users to the file via calls to the dataFileWriter.append method. When we are done writing, we close the data file. + +### Deserializing +Finally, we'll deserialize the data file we just created. + +```java +// Deserialize users from disk +DatumReader datumReader = new GenericDatumReader(schema); +DataFileReader dataFileReader = new DataFileReader(file, datumReader); +GenericRecord user = null; +while (dataFileReader.hasNext()) { +// Reuse user object by passing it to next(). This saves us from +// allocating and garbage collecting many objects for files with +// many items. +user = dataFileReader.next(user); +System.out.println(user); +``` + +This outputs: + +```json +{"name": "Alyssa", "favorite_number": 256, "favorite_color": null} +{"name": "Ben", "favorite_number": 7, "favorite_color": "red"} +``` + +Deserializing is very similar to serializing. We create a GenericDatumReader, analogous to the GenericDatumWriter we used in serialization, which converts in-memory serialized items into GenericRecords. We pass the DatumReader and the previously created File to a DataFileReader, analogous to the DataFileWriter, which reads both the schema used by the writer as well as the data from the file on disk. The data will be read using the writer's schema included in the file, and the reader's schema provided to the GenericDatumReader. The writer's schema is needed to know the order in which fields were written, while the reader's schema is needed to know what fields are expected and how to fill in default values for fields added since the file was written. If there are differences between the two schemas, they are resolved according to the Schema Resolution specification. + +Next, we use the DataFileReader to iterate through the serialized users and print the deserialized object to stdout. Note how we perform the iteration: we create a single GenericRecord object which we store the current deserialized user in, and pass this record object to every call of dataFileReader.next. This is a performance optimization that allows the DataFileReader to reuse the same record object rather than allocating a new GenericRecord for every iteration, which can be very expensive in terms of object allocation and garbage collection if we deserialize a large data file. While this technique is the standard way to iterate through a data file, it's also possible to use for (GenericRecord user : dataFileReader) if performance is not a concern. + +### Compiling and running the example code +This example code is included as a Maven project in the examples/java-example directory in the Avro docs. From this directory, execute the following commands to build and run the example: + +```shell +$ mvn compile +$ mvn -q exec:java -Dexec.mainClass=example.GenericMain +``` diff --git a/doc/content/en/docs/1.11.2/Getting started (Python)/_index.md b/doc/content/en/docs/1.11.2/Getting started (Python)/_index.md new file mode 100644 index 00000000000..96ae73660ae --- /dev/null +++ b/doc/content/en/docs/1.11.2/Getting started (Python)/_index.md @@ -0,0 +1,147 @@ +--- +categories: [] +tags: ["python"] +title: "Getting Started (Python)" +linkTitle: "Getting Started (Python)" +weight: 3 +--- + + + +This is a short guide for getting started with Apache Avroâ„ĸ using Python. This guide only covers using Avro for data serialization; see Patrick Hunt's Avro RPC Quick Start for a good introduction to using Avro for RPC. + +## Notice for Python 3 users +A package called "avro-python3" had been provided to support Python 3 previously, but the codebase was consolidated into the "avro" package and that supports both Python 2 and 3 now. The avro-python3 package will be removed in the near future, so users should use the "avro" package instead. They are mostly API compatible, but there's a few minor difference (e.g., function name capitalization, such as avro.schema.Parse vs avro.schema.parse). + +## Download +For Python, the easiest way to get started is to install it from PyPI. Python's Avro API is available over PyPi. + +```shell +$ python3 -m pip install avro +``` + +The official releases of the Avro implementations for C, C++, C#, Java, PHP, Python, and Ruby can be downloaded from the Apache Avroâ„ĸ Releases page. This guide uses Avro 1.11.2, the latest version at the time of writing. Download and unzip avro-1.11.2.tar.gz, and install via python setup.py (this will probably require root privileges). Ensure that you can import avro from a Python prompt. + +```shell +$ tar xvf avro-1.11.2.tar.gz +$ cd avro-1.11.2 +$ python setup.py install +$ python +>>> import avro # should not raise ImportError +``` + +Alternatively, you may build the Avro Python library from source. From your the root Avro directory, run the commands + +```shell +$ cd lang/py/ +$ python3 -m pip install -e . +$ python +``` + +## Defining a schema +Avro schemas are defined using JSON. Schemas are composed of primitive types (null, boolean, int, long, float, double, bytes, and string) and complex types (record, enum, array, map, union, and fixed). You can learn more about Avro schemas and types from the specification, but for now let's start with a simple schema example, user.avsc: + +```json +{"namespace": "example.avro", + "type": "record", + "name": "User", + "fields": [ + {"name": "name", "type": "string"}, + {"name": "favorite_number", "type": ["int", "null"]}, + {"name": "favorite_color", "type": ["string", "null"]} + ] +} +``` + +This schema defines a record representing a hypothetical user. (Note that a schema file can only contain a single schema definition.) At minimum, a record definition must include its type ("type": "record"), a name ("name": "User"), and fields, in this case name, favorite_number, and favorite_color. We also define a namespace ("namespace": "example.avro"), which together with the name attribute defines the "full name" of the schema (example.avro.User in this case). + +Fields are defined via an array of objects, each of which defines a name and type (other attributes are optional, see the record specification for more details). The type attribute of a field is another schema object, which can be either a primitive or complex type. For example, the name field of our User schema is the primitive type string, whereas the favorite_number and favorite_color fields are both unions, represented by JSON arrays. unions are a complex type that can be any of the types listed in the array; e.g., favorite_number can either be an int or null, essentially making it an optional field. + +## Serializing and deserializing without code generation +Data in Avro is always stored with its corresponding schema, meaning we can always read a serialized item, regardless of whether we know the schema ahead of time. This allows us to perform serialization and deserialization without code generation. Note that the Avro Python library does not support code generation. + +Try running the following code snippet, which serializes two users to a data file on disk, and then reads back and deserializes the data file: + +```python +import avro.schema +from avro.datafile import DataFileReader, DataFileWriter +from avro.io import DatumReader, DatumWriter + +schema = avro.schema.parse(open("user.avsc", "rb").read()) + +writer = DataFileWriter(open("users.avro", "wb"), DatumWriter(), schema) +writer.append({"name": "Alyssa", "favorite_number": 256}) +writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"}) +writer.close() + +reader = DataFileReader(open("users.avro", "rb"), DatumReader()) +for user in reader: + print(user) +reader.close() +``` + +This outputs: + +```json +{u'favorite_color': None, u'favorite_number': 256, u'name': u'Alyssa'} +{u'favorite_color': u'red', u'favorite_number': 7, u'name': u'Ben'} +``` + +Do make sure that you open your files in binary mode (i.e. using the modes wb or rb respectively). Otherwise you might generate corrupt files due to automatic replacement of newline characters with the platform-specific representations. + +Let's take a closer look at what's going on here. + +```python +schema = avro.schema.parse(open("user.avsc", "rb").read()) +``` + +avro.schema.parse takes a string containing a JSON schema definition as input and outputs a avro.schema.Schema object (specifically a subclass of Schema, in this case RecordSchema). We're passing in the contents of our user.avsc schema file here. + +```python +writer = DataFileWriter(open("users.avro", "wb"), DatumWriter(), schema) +``` + +We create a DataFileWriter, which we'll use to write serialized items to a data file on disk. The DataFileWriter constructor takes three arguments: + +* The file we'll serialize to +* A DatumWriter, which is responsible for actually serializing the items to Avro's binary format (DatumWriters can be used separately from DataFileWriters, e.g., to perform IPC with Avro). +* The schema we're using. The DataFileWriter needs the schema both to write the schema to the data file, and to verify that the items we write are valid items and write the appropriate fields. + +```python +writer.append({"name": "Alyssa", "favorite_number": 256}) +writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"}) +``` + +We use DataFileWriter.append to add items to our data file. Avro records are represented as Python dicts. Since the field favorite_color has type ["string", "null"], we are not required to specify this field, as shown in the first append. Were we to omit the required name field, an exception would be raised. Any extra entries not corresponding to a field are present in the dict are ignored. + +```python +reader = DataFileReader(open("users.avro", "rb"), DatumReader()) +``` + +We open the file again, this time for reading back from disk. We use a DataFileReader and DatumReader analagous to the DataFileWriter and DatumWriter above. + +```python +for user in reader: + print(user) +``` + +The DataFileReader is an iterator that returns dicts corresponding to the serialized items. diff --git a/doc/content/en/docs/1.11.2/IDL Language/_index.md b/doc/content/en/docs/1.11.2/IDL Language/_index.md new file mode 100644 index 00000000000..f50b0a489be --- /dev/null +++ b/doc/content/en/docs/1.11.2/IDL Language/_index.md @@ -0,0 +1,435 @@ +--- +title: "IDL Language" +linkTitle: "IDL Language" +weight: 201 +--- + + + +## Introduction +This document defines Avro IDL, a higher-level language for authoring Avro schemata. Before reading this document, you should have familiarity with the concepts of schemata and protocols, as well as the various primitive and complex types available in Avro. + +## Overview + +### Purpose +The aim of the Avro IDL language is to enable developers to author schemata in a way that feels more similar to common programming languages like Java, C++, or Python. Additionally, the Avro IDL language may feel more familiar for those users who have previously used the interface description languages (IDLs) in other frameworks like Thrift, Protocol Buffers, or CORBA. + +### Usage +Each Avro IDL file defines a single Avro Protocol, and thus generates as its output a JSON-format Avro Protocol file with extension .avpr. + +To convert a _.avdl_ file into a _.avpr_ file, it may be processed by the `idl` tool. For example: +```shell +$ java -jar avro-tools.jar idl src/test/idl/input/namespaces.avdl /tmp/namespaces.avpr +$ head /tmp/namespaces.avpr +{ + "protocol" : "TestNamespace", + "namespace" : "avro.test.protocol", +``` +The `idl` tool can also process input to and from _stdin_ and _stdout_. See `idl --help` for full usage information. + +A Maven plugin is also provided to compile .avdl files. To use it, add something like the following to your pom.xml: +```xml + + + + org.apache.avro + avro-maven-plugin + + + + idl-protocol + + + + + + +``` + +## Defining a Protocol in Avro IDL +An Avro IDL file consists of exactly one protocol definition. The minimal protocol is defined by the following code: +```java +protocol MyProtocol { +} +``` +This is equivalent to (and generates) the following JSON protocol definition: +```json +{ +"protocol" : "MyProtocol", + "types" : [ ], + "messages" : { + } +} +``` +The namespace of the protocol may be changed using the @namespace annotation: +```java +@namespace("mynamespace") +protocol MyProtocol { +} +``` +This notation is used throughout Avro IDL as a way of specifying properties for the annotated element, as will be described later in this document. + +Protocols in Avro IDL can contain the following items: + +* Imports of external protocol and schema files. +* Definitions of named schemata, including records, errors, enums, and fixeds. +* Definitions of RPC messages + +## Imports +Files may be imported in one of three formats: + +* An IDL file may be imported with a statement like: + + `import idl "foo.avdl";` + +* A JSON protocol file may be imported with a statement like: + + `import protocol "foo.avpr";` + +* A JSON schema file may be imported with a statement like: + + `import schema "foo.avsc";` + +Messages and types in the imported file are added to this file's protocol. + +Imported file names are resolved relative to the current IDL file. + +## Defining an Enumeration +Enums are defined in Avro IDL using a syntax similar to C or Java. An Avro Enum supports optional default values. In the case that a reader schema is unable to recognize a symbol written by the writer, the reader will fall back to using the defined default value. This default is only used when an incompatible symbol is read. It is not used if the enum field is missing. + +Example Writer Enum Definition +```java +enum Shapes { + SQUARE, TRIANGLE, CIRCLE, OVAL +} +``` +Example Reader Enum Definition +```java +enum Shapes { + SQUARE, TRIANGLE, CIRCLE +} = CIRCLE; +``` +In the above example, the reader will use the default value of `CIRCLE` whenever reading data written with the `OVAL` symbol of the writer. Also note that, unlike the JSON format, anonymous enums cannot be defined. + +## Defining a Fixed Length Field +Fixed fields are defined using the following syntax: +``` +fixed MD5(16); +``` +This example defines a fixed-length type called MD5 which contains 16 bytes. + +## Defining Records and Errors +Records are defined in Avro IDL using a syntax similar to a struct definition in C: +```java +record Employee { + string name; + boolean active = true; + long salary; +} +``` +The above example defines a record with the name “Employee” with three fields. + +To define an error, simply use the keyword _error_ instead of _record_. For example: +```java +error Kaboom { + string explanation; + int result_code = -1; +} +``` +Each field in a record or error consists of a type and a name, optional property annotations and an optional default value. + +A type reference in Avro IDL must be one of: + +* A primitive type +* A logical type +* A named schema defined prior to this usage in the same Protocol +* A complex type (array, map, or union) + +### Primitive Types +The primitive types supported by Avro IDL are the same as those supported by Avro's JSON format. This list includes _int_, _long_, _string_, _boolean_, _float_, _double_, _null_, and _bytes_. + +### Logical Types +Some of the logical types supported by Avro's JSON format are also supported by Avro IDL. The currently supported types are: + +* _decimal_ (logical type [decimal]({{< relref "../specification#decimal" >}})) +* _date_ (logical type [date]({{< relref "../specification#date" >}})) +* _time_ms_ (logical type [time-millis]({{< relref "../specification#time-millisecond-precision" >}})) +* _timestamp_ms_ (logical type [timestamp-millis]({{< relref "../specification#timestamp-millisecond-precision" >}})) +* _uuid_ (logical type [uuid]({{< relref "../specification#uuid" >}})) + +For example: +```java +record Job { + string jobid; + date submitDate; + time_ms submitTime; + timestamp_ms finishTime; + decimal(9,2) finishRatio; + uuid pk = "a1a2a3a4-b1b2-c1c2-d1d2-d3d4d5d6d7d8"; +} +``` + +Logical types can also be specified via an annotation, which is useful for logical types for which a keyword does not exist: + +```java +record Job { + string jobid; + @logicalType("timestamp-micros") + long finishTime; +} +``` + +### References to Named Schemata +If a named schema has already been defined in the same Avro IDL file, it may be referenced by name as if it were a primitive type: +```java +record Card { + Suit suit; // refers to the enum Card defined above + int number; +} +``` + +### Default Values +Default values for fields may be optionally specified by using an equals sign after the field name followed by a JSON expression indicating the default value. This JSON is interpreted as described in the [spec]({{< relref "../specification#schema-record" >}}). + +### Complex Types + +#### Arrays +Array types are written in a manner that will seem familiar to C++ or Java programmers. An array of any type t is denoted `array`. For example, an array of strings is denoted `array`, and a multidimensional array of Foo records would be `array>`. + +#### Maps +Map types are written similarly to array types. An array that contains values of type t is written `map`. As in the JSON schema format, all maps contain `string`-type keys. + +#### Unions +Union types are denoted as `union { typeA, typeB, typeC, ... }`. For example, this record contains a string field that is optional (unioned with null), and a field containing either a precise or a imprecise number: +```java +record RecordWithUnion { + union { null, string } optionalString; + union { decimal(12, 6), float } number; +} +``` +Note that the same restrictions apply to Avro IDL unions as apply to unions defined in the JSON format; namely, a record may not contain multiple elements of the same type. Also, fields/parameters that use the union type and have a default parameter must specify a default value of the same type as the **first** union type. + +Because it occurs so often, there is a special shorthand to denote a union of `null` with another type. In the following snippet, the first three fields have identical types: + +```java +record RecordWithUnion { + union { null, string } optionalString1 = null; + string? optionalString2 = null; + string? optionalString3; // No default value + string? optionalString4 = "something"; +} +``` + +Note that unlike explicit unions, the position of the `null` type is fluid; it will be the first or last type depending on the default value (if any). So in the example above, all fields are valid. + +## Defining RPC Messages +The syntax to define an RPC message within a Avro IDL protocol is similar to the syntax for a method declaration within a C header file or a Java interface. To define an RPC message add which takes two arguments named _foo_ and _bar_, returning an _int_, simply include the following definition within the protocol: +```java +int add(int foo, int bar = 0); +``` +Message arguments, like record fields, may specify default values. + +To define a message with no response, you may use the alias _void_, equivalent to the Avro _null_ type: +```java +void logMessage(string message); +``` +If you have previously defined an error type within the same protocol, you may declare that a message can throw this error using the syntax: +```java +void goKaboom() throws Kaboom; +``` +To define a one-way message, use the keyword `oneway` after the parameter list, for example: +```java +void fireAndForget(string message) oneway; +``` + +## Other Language Features + +### Comments +All Java-style comments are supported within a Avro IDL file. Any text following _//_ on a line is ignored, as is any text between _/*_ and _*/_, possibly spanning multiple lines. + +Comments that begin with _/**_ are used as the documentation string for the type or field definition that follows the comment. + +### Escaping Identifiers +Occasionally, one will need to use a reserved language keyword as an identifier. In order to do so, backticks (`) may be used to escape the identifier. For example, to define a message with the literal name error, you may write: +```java +void `error`(); +``` +This syntax is allowed anywhere an identifier is expected. + +### Annotations for Ordering and Namespaces +Java-style annotations may be used to add additional properties to types and fields throughout Avro IDL. + +For example, to specify the sort order of a field within a record, one may use the `@order` annotation before the field name as follows: +```java +record MyRecord { + string @order("ascending") myAscendingSortField; + string @order("descending") myDescendingField; + string @order("ignore") myIgnoredField; +} +``` +A field's type (with the exception of type references) may also be preceded by annotations, e.g.: +```java +record MyRecord { + @java-class("java.util.ArrayList") array myStrings; +} +``` +This can be used to support java classes that can be serialized/deserialized via their `toString`/`String constructor`, e.g.: +```java +record MyRecord { + @java-class("java.math.BigDecimal") string value; + @java-key-class("java.io.File") map fileStates; + array<@java-class("java.math.BigDecimal") string> weights; +} +``` +Similarly, a `@namespace` annotation may be used to modify the namespace when defining a named schema. For example: +```java +@namespace("org.apache.avro.firstNamespace") +protocol MyProto { + @namespace("org.apache.avro.someOtherNamespace") + record Foo {} + + record Bar {} +} +``` +will define a protocol in the _firstNamespace_ namespace. The record _Foo_ will be defined in _someOtherNamespace_ and _Bar_ will be defined in _firstNamespace_ as it inherits its default from its container. + +Type and field aliases are specified with the `@aliases` annotation as follows: +```java +@aliases(["org.old.OldRecord", "org.ancient.AncientRecord"]) +record MyRecord { + string @aliases(["oldField", "ancientField"]) myNewField; +} +``` +Some annotations like those listed above are handled specially. All other annotations are added as properties to the protocol, message, schema or field. + +## Complete Example +The following is an example of an Avro IDL file that shows most of the above features: +```java +/* +* Header with license information. +*/ + +/** + * An example protocol in Avro IDL + */ +@namespace("org.apache.avro.test") +protocol Simple { + /** Documentation for the enum type Kind */ + @aliases(["org.foo.KindOf"]) + enum Kind { + FOO, + BAR, // the bar enum value + BAZ + } = FOO; // For schema evolution purposes, unmatched values do not throw an error, but are resolved to FOO. + + /** MD5 hash; good enough to avoid most collisions, and smaller than (for example) SHA256. */ + fixed MD5(16); + + record TestRecord { + /** Record name; has no intrinsic order */ + string @order("ignore") name; + + Kind @order("descending") kind; + + MD5 hash; + + /* + Note that 'null' is the first union type. Just like .avsc / .avpr files, the default value must be of the first union type. + */ + union { null, MD5 } /** Optional field */ @aliases(["hash"]) nullableHash = null; + + array arrayOfLongs; + } + + /** Errors are records that can be thrown from a method */ + error TestError { + string message; + } + + string hello(string greeting); + /** Return what was given. Demonstrates the use of backticks to name types/fields/messages/parameters after keywords */ + TestRecord echo(TestRecord `record`); + int add(int arg1, int arg2); + bytes echoBytes(bytes data); + void `error`() throws TestError; + // The oneway keyword forces the method to return null. + void ping() oneway; +} +``` +Additional examples may be found in the Avro source tree under the `src/test/idl/input` directory. + +## IDE support + +There are several editors and IDEs that support Avro IDL files, usually via plugins. + +### JetBrains + +Apache Avro IDL Schema Support 203.1.2 was released in 9 December 2021. + +Features: +* Syntax Highlighting +* Code Completion +* Code Formatting +* Error Highlighting +* Inspections & quick fixes +* JSON schemas for .avpr and .avsc files + +It's available via the [JetBrains Marketplace](https://plugins.jetbrains.com/plugin/15728-apache-avro-idl-schema-support) +and on [GitHub](https://github.com/opwvhk/avro-schema-support). + +The plugin supports almost the all JetBrains products: IntelliJ IDEA, PyCharm, WebStorm, Android Studio, AppCode, GoLand, Rider, CLion, RubyMine, PhpStorm, DataGrip, DataSpell, MPS, Code With Me Guest and JetBrains Client. + +Only JetBrains Gateway does not support this plugin directly. But the backend (JetBrains) IDE that it connects to does. + +### Eclipse + +Avroclipse 0.0.11 was released on 4 December 2019. + +Features: +* Syntax Highlighting +* Error Highlighting +* Code Completion + +It is available on the [Eclipse Marketplace](https://marketplace.eclipse.org/content/avroclipse) +and [GitHub](https://github.com/dvdkruk/avroclipse). + +### Visual Studio Code + +avro-idl 0.5.0 was released on 16 June 2021. It provides syntax highlighting. + +It is available on the [VisualStudio Marketplace](https://marketplace.visualstudio.com/items?itemName=streetsidesoftware.avro) +and [GitHub](https://github.com/Jason3S/vscode-avro-ext) + +### Atom.io + +atom-language-avro 0.0.13 was released on 14 August 2015. It provides syntax highlighting. + +It is available as [Atom.io package](https://atom.io/packages/atom-language-avro) +and [GitHub](https://github.com/jonesetc/atom-language-avro) + +### Vim + +A `.avdl` detecting plugin by Gurpreet Atwal on [GitHub](https://github.com/gurpreetatwal/vim-avro) (Last change in December 2016) + +[avro-idl.vim](https://github.com/apache/avro/blob/master/share/editors/avro-idl.vim) in the Avro repository `share/editors` directory (last change in September 2010) + +Both provide syntax highlighting. diff --git a/doc/content/en/docs/1.11.2/MapReduce guide/_index.md b/doc/content/en/docs/1.11.2/MapReduce guide/_index.md new file mode 100644 index 00000000000..5e767936c18 --- /dev/null +++ b/doc/content/en/docs/1.11.2/MapReduce guide/_index.md @@ -0,0 +1,396 @@ +--- +title: "MapReduce guide" +linkTitle: "MapReduce guide" +weight: 200 +--- + + + +Avro provides a convenient way to represent complex data structures within a Hadoop MapReduce job. Avro data can be used as both input to and output from a MapReduce job, as well as the intermediate format. The example in this guide uses Avro data for all three, but it's possible to mix and match; for instance, MapReduce can be used to aggregate a particular field in an Avro record. + +This guide assumes basic familiarity with both Hadoop MapReduce and Avro. See the [Hadoop documentation](https://hadoop.apache.org/docs/current/) and the [Avro getting started guide](./getting-started-java/) for introductions to these projects. This guide uses the old MapReduce API (`org.apache.hadoop.mapred`) and the new MapReduce API (`org.apache.hadoop.mapreduce`). + +## Setup +The code from this guide is included in the Avro docs under examples/mr-example. The example is set up as a Maven project that includes the necessary Avro and MapReduce dependencies and the Avro Maven plugin for code generation, so no external jars are needed to run the example. In particular, the POM includes the following dependencies: +```xml + + org.apache.avro + avro + 1.11.2 + + + org.apache.avro + avro-mapred + 1.11.2 + + + org.apache.hadoop + hadoop-client + 3.1.2 + +``` +And the following plugin: +```xml + + org.apache.avro + avro-maven-plugin + 1.11.2 + + + generate-sources + + schema + + + ${project.basedir}/../ + ${project.basedir}/target/generated-sources/ + + + + +``` + +If you do not configure the *sourceDirectory* and *outputDirectory* properties, the defaults will be used. The *sourceDirectory* property defaults to *src/main/avro*. The *outputDirectory* property defaults to *target/generated-sources*. You can change the paths to match your project layout. + +Alternatively, Avro jars can be downloaded directly from the Apache Avroâ„ĸ Releases [page](https://avro.apache.org/releases.html). The relevant Avro jars for this guide are *avro-1.11.2.jar* and *avro-mapred-1.11.2.jar*, as well as *avro-tools-1.11.2.jar* for code generation and viewing Avro data files as JSON. In addition, you will need to install Hadoop in order to use MapReduce. + +## Example: ColorCount +Below is a simple example of a MapReduce that uses Avro. There is an example for both the old (org.apache.hadoop.mapred) and new (org.apache.hadoop.mapreduce) APIs under *examples/mr-example/src/main/java/example/*. _MapredColorCount_ is the example for the older mapred API while _MapReduceColorCount_ is the example for the newer mapreduce API. Both examples are below, but we will detail the mapred API in our subsequent examples. + +MapredColorCount.java: +```java +package example; + +import java.io.IOException; + +import org.apache.avro.*; +import org.apache.avro.Schema.Type; +import org.apache.avro.mapred.*; +import org.apache.hadoop.conf.*; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapred.*; +import org.apache.hadoop.util.*; + +import example.avro.User; + +public class MapredColorCount extends Configured implements Tool { + + public static class ColorCountMapper extends AvroMapper> { + @Override + public void map(User user, AvroCollector> collector, Reporter reporter) + throws IOException { + CharSequence color = user.getFavoriteColor(); + // We need this check because the User.favorite_color field has type ["string", "null"] + if (color == null) { + color = "none"; + } + collector.collect(new Pair(color, 1)); + } + } + + public static class ColorCountReducer extends AvroReducer> { + @Override + public void reduce(CharSequence key, Iterable values, + AvroCollector> collector, + Reporter reporter) + throws IOException { + int sum = 0; + for (Integer value : values) { + sum += value; + } + collector.collect(new Pair(key, sum)); + } + } + + public int run(String[] args) throws Exception { + if (args.length != 2) { + System.err.println("Usage: MapredColorCount "); + return -1; + } + + JobConf conf = new JobConf(getConf(), MapredColorCount.class); + conf.setJobName("colorcount"); + + FileInputFormat.setInputPaths(conf, new Path(args[0])); + FileOutputFormat.setOutputPath(conf, new Path(args[1])); + + AvroJob.setMapperClass(conf, ColorCountMapper.class); + AvroJob.setReducerClass(conf, ColorCountReducer.class); + + // Note that AvroJob.setInputSchema and AvroJob.setOutputSchema set + // relevant config options such as input/output format, map output + // classes, and output key class. + AvroJob.setInputSchema(conf, User.getClassSchema()); + AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING), + Schema.create(Type.INT))); + + JobClient.runJob(conf); + return 0; + } + + public static void main(String[] args) throws Exception { + int res = ToolRunner.run(new Configuration(), new MapredColorCount(), args); + System.exit(res); + } +} +``` + +MapReduceColorCount.java: +```java +package example; + +import java.io.IOException; + +import org.apache.avro.Schema; +import org.apache.avro.mapred.AvroKey; +import org.apache.avro.mapred.AvroValue; +import org.apache.avro.mapreduce.AvroJob; +import org.apache.avro.mapreduce.AvroKeyInputFormat; +import org.apache.avro.mapreduce.AvroKeyValueOutputFormat; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +import example.avro.User; + +public class MapReduceColorCount extends Configured implements Tool { + + public static class ColorCountMapper extends + Mapper, NullWritable, Text, IntWritable> { + + @Override + public void map(AvroKey key, NullWritable value, Context context) + throws IOException, InterruptedException { + + CharSequence color = key.datum().getFavoriteColor(); + if (color == null) { + color = "none"; + } + context.write(new Text(color.toString()), new IntWritable(1)); + } + } + + public static class ColorCountReducer extends + Reducer, AvroValue> { + + @Override + public void reduce(Text key, Iterable values, + Context context) throws IOException, InterruptedException { + + int sum = 0; + for (IntWritable value : values) { + sum += value.get(); + } + context.write(new AvroKey(key.toString()), new AvroValue(sum)); + } + } + + public int run(String[] args) throws Exception { + if (args.length != 2) { + System.err.println("Usage: MapReduceColorCount "); + return -1; + } + + Job job = new Job(getConf()); + job.setJarByClass(MapReduceColorCount.class); + job.setJobName("Color Count"); + + FileInputFormat.setInputPaths(job, new Path(args[0])); + FileOutputFormat.setOutputPath(job, new Path(args[1])); + + job.setInputFormatClass(AvroKeyInputFormat.class); + job.setMapperClass(ColorCountMapper.class); + AvroJob.setInputKeySchema(job, User.getClassSchema()); + job.setMapOutputKeyClass(Text.class); + job.setMapOutputValueClass(IntWritable.class); + + job.setOutputFormatClass(AvroKeyValueOutputFormat.class); + job.setReducerClass(ColorCountReducer.class); + AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); + AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); + + return (job.waitForCompletion(true) ? 0 : 1); + } + + public static void main(String[] args) throws Exception { + int res = ToolRunner.run(new MapReduceColorCount(), args); + System.exit(res); + } +} +``` +ColorCount reads in data files containing *User* records, defined in _examples/user.avsc_, and counts the number of instances of each favorite color. (This example draws inspiration from the canonical _WordCount_ MapReduce application.) This example uses the old MapReduce API. See MapReduceAvroWordCount, found under _doc/examples/mr-example/src/main/java/example/_ to see the new MapReduce API example. The User schema is defined as follows: +```json +{"namespace": "example.avro", + "type": "record", + "name": "User", + "fields": [ + {"name": "name", "type": "string"}, + {"name": "favorite_number", "type": ["int", "null"]}, + {"name": "favorite_color", "type": ["string", "null"]} + ] +} +``` +This schema is compiled into the *User* class used by *ColorCount* via the Avro Maven plugin (see _examples/mr-example/pom.xml_ for how this is set up). + +*ColorCountMapper* essentially takes a *User* as input and extracts the User's favorite color, emitting the key-value pair ``. _ColorCountReducer_ then adds up how many occurrences of a particular favorite color were emitted, and outputs the result as a Pair record. These Pairs are serialized to an Avro data file. + +## Running ColorCount +The _ColorCount_ application is provided as a Maven project in the Avro docs under _examples/mr-example_. To build the project, including the code generation of the User schema, run: +```shell +mvn compile +``` +Next, run _GenerateData_ from `examples/mr-examples` to create an Avro data file, `input/users.avro`, containing 20 Users with favorite colors chosen randomly from a list: +```shell +mvn exec:java -q -Dexec.mainClass=example.GenerateData +``` +Besides creating the data file, GenerateData prints the JSON representations of the Users generated to stdout, for example: +```json +{"name": "user", "favorite_number": null, "favorite_color": "red"} +{"name": "user", "favorite_number": null, "favorite_color": "green"} +{"name": "user", "favorite_number": null, "favorite_color": "purple"} +{"name": "user", "favorite_number": null, "favorite_color": null} +... +``` +Now we're ready to run ColorCount. We specify our freshly-generated input folder as the input path and output as our output folder (note that MapReduce will not start a job if the output folder already exists): +```shell +mvn exec:java -q -Dexec.mainClass=example.MapredColorCount -Dexec.args="input output" +``` +Once ColorCount completes, checking the contents of the new output directory should yield the following: +```shell +$ ls output/ +part-00000.avro _SUCCESS +``` +You can check the contents of the generated Avro file using the avro-tools jar: +```shell +$ java -jar /path/to/avro-tools-1.11.2.jar tojson output/part-00000.avro +{"value": 3, "key": "blue"} +{"value": 7, "key": "green"} +{"value": 1, "key": "none"} +{"value": 2, "key": "orange"} +{"value": 3, "key": "purple"} +{"value": 2, "key": "red"} +{"value": 2, "key": "yellow"} +``` +Now let's go over the ColorCount example in detail. + +## AvroMapper - org.apache.hadoop.mapred API + +The easiest way to use Avro data files as input to a MapReduce job is to subclass `AvroMapper`. An `AvroMapper` defines a `map` function that takes an Avro datum as input and outputs a key/value pair represented as a Pair record. In the ColorCount example, ColorCountMapper is an AvroMapper that takes a User as input and outputs a `Pair>`, where the CharSequence key is the user's favorite color and the Integer value is 1. +```java +public static class ColorCountMapper extends AvroMapper> { + @Override + public void map(User user, AvroCollector> collector, Reporter reporter) + throws IOException { + CharSequence color = user.getFavoriteColor(); + // We need this check because the User.favorite_color field has type ["string", "null"] + if (color == null) { + color = "none"; + } + collector.collect(new Pair(color, 1)); + } +} +``` +In order to use our AvroMapper, we must call AvroJob.setMapperClass and AvroJob.setInputSchema. +```java +AvroJob.setMapperClass(conf, ColorCountMapper.class); +AvroJob.setInputSchema(conf, User.getClassSchema()); +``` +Note that `AvroMapper` does not implement the `Mapper` interface. Under the hood, the specified Avro data files are deserialized into AvroWrappers containing the actual data, which are processed by a Mapper that calls the configured AvroMapper's map function. AvroJob.setInputSchema sets up the relevant configuration parameters needed to make this happen, thus you should not need to call `JobConf.setMapperClass`, `JobConf.setInputFormat`, `JobConf.setMapOutputKeyClass`, `JobConf.setMapOutputValueClass`, or `JobConf.setOutputKeyComparatorClass`. + +## Mapper - org.apache.hadoop.mapreduce API +This document will not go into all the differences between the mapred and mapreduce APIs, however will describe the main differences. As you can see, ColorCountMapper is now a subclass of the Hadoop Mapper class and is passed an AvroKey as it's key. Additionally, the AvroJob method calls were slightly changed. +```java + public static class ColorCountMapper extends + Mapper, NullWritable, Text, IntWritable> { + + @Override + public void map(AvroKey key, NullWritable value, Context context) + throws IOException, InterruptedException { + + CharSequence color = key.datum().getFavoriteColor(); + if (color == null) { + color = "none"; + } + context.write(new Text(color.toString()), new IntWritable(1)); + } + } +``` + +## AvroReducer - org.apache.hadoop.mapred API +Analogously to AvroMapper, an AvroReducer defines a reducer function that takes the key/value types output by an AvroMapper (or any mapper that outputs Pairs) and outputs a key/value pair represented a Pair record. In the ColorCount example, ColorCountReducer is an AvroReducer that takes the CharSequence key representing a favorite color and the `Iterable` representing the counts for that color (they should all be 1 in this example) and adds up the counts. +```java +public static class ColorCountReducer extends AvroReducer> { + @Override + public void reduce(CharSequence key, Iterable values, + AvroCollector> collector, + Reporter reporter) + throws IOException { + int sum = 0; + for (Integer value : values) { + sum += value; + } + collector.collect(new Pair(key, sum)); + } +} +``` +In order to use our AvroReducer, we must call AvroJob.setReducerClass and AvroJob.setOutputSchema. +```java +AvroJob.setReducerClass(conf, ColorCountReducer.class); +AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING), + Schema.create(Type.INT))); +``` +Note that _AvroReducer_ does not implement the _Reducer_ interface. The intermediate Pairs output by the mapper are split into _AvroKeys_ and _AvroValues_, which are processed by a Reducer that calls the configured AvroReducer's `reduce` function. `AvroJob.setOutputSchema` sets up the relevant configuration parameters needed to make this happen, thus you should not need to call `JobConf.setReducerClass`, `JobConf.setOutputFormat`, `JobConf.setOutputKeyClass`, `JobConf.setMapOutputKeyClass`, `JobConf.setMapOutputValueClass`, or `JobConf.setOutputKeyComparatorClass`. + +## Reduce - org.apache.hadoop.mapreduce API +As before we not detail every difference between the APIs. As with the _Mapper_ change _ColorCountReducer_ is now a subclass of _Reducer_ and _AvroKey_ and _AvroValue_ are emitted. Additionally, the _AvroJob_ method calls were slightly changed. +```java + public static class ColorCountReducer extends + Reducer, AvroValue> { + + @Override + public void reduce(Text key, Iterable values, + Context context) throws IOException, InterruptedException { + + int sum = 0; + for (IntWritable value : values) { + sum += value.get(); + } + context.write(new AvroKey(key.toString()), new AvroValue(sum)); + } + } +``` + +## Learning more +The mapred API allows users to mix Avro AvroMappers and AvroReducers with non-Avro Mappers and Reducers and the mapreduce API allows users input Avro and output non-Avro or vice versa. + +The mapred package has API org.apache.avro.mapred documentation as does the `org.apache.avro.mapreduce` package. MapReduce API (`org.apache.hadoop.mapreduce`). Similarily to the mapreduce package, it's possible with the mapred API to implement your own Mappers and Reducers directly using the public classes provided in these libraries. See the `AvroWordCount` application, found under _examples/mr-example/src/main/java/example/AvroWordCount.java_ in the Avro documentation, for an example of implementing a Reducer that outputs Avro data using the old MapReduce API. See the `MapReduceAvroWordCount` application, found under _examples/mr-example/src/main/java/example/MapReduceAvroWordCount.java_ in the Avro documentation, for an example of implementing a Reducer that outputs Avro data using the new MapReduce API. diff --git a/doc/content/en/docs/1.11.2/SASL profile/_index.md b/doc/content/en/docs/1.11.2/SASL profile/_index.md new file mode 100644 index 00000000000..67c316e221c --- /dev/null +++ b/doc/content/en/docs/1.11.2/SASL profile/_index.md @@ -0,0 +1,93 @@ +--- +title: "SASL profile" +linkTitle: "SASL profile" +weight: 202 +--- + + + +## Introduction +SASL ([RFC 2222](https://www.ietf.org/rfc/rfc2222.txt)) provides a framework for authentication and security of network protocols. Each protocol that uses SASL is meant to define a SASL profile. This document provides a SASL profile for connection-based Avro RPC. + +## Overview +SASL negotiation proceeds as a series of message interactions over a connection between a client and server using a selected SASL mechanism. The client starts this negotiation by sending its chosen mechanism name with an initial (possibly empty) message. Negotiation proceeds with the exchange of messages until either side indicates success or failure. The content of the messages is mechanism-specific. If the negotiation succeeds, then the session can proceed over the connection, otherwise it must be abandoned. + +Some mechanisms continue to process session data after negotiation (e.g., encrypting it), while some specify that further session data is transmitted unmodifed. + +## Negotiation + +### Commands +Avro SASL negotiation uses four one-byte commands. + +* 0: START Used in a client's initial message. +* 1: CONTINUE Used while negotiation is ongoing. +* 2: FAIL Terminates negotiation unsuccessfully. +* 3: COMPLETE Terminates negotiation sucessfully. + +The format of a START message is: + +`| 0 | 4-byte mechanism name length | mechanism name | 4-byte payload length | payload data |` + +The format of a CONTINUE message is: + +`| 1 | 4-byte payload length | payload data |` + +The format of a FAIL message is: + +`| 2 | 4-byte message length | UTF-8 message |` + +The format of a COMPLETE message is: + +`| 3 | 4-byte payload length | payload data |` + +### Process +Negotiation is initiated by a client sending a START command containing the client's chosen mechanism name and any mechanism-specific payload data. + +The server and client then interchange some number (possibly zero) of CONTINUE messages. Each message contains payload data that is processed by the security mechanism to generate the next message. + +Once either the client or server send a FAIL message then negotiation has failed. UTF-8-encoded text is included in the failure message. Once either a FAIL message has been sent or received, or any other error occurs in the negotiation, further communication on this connection must cease. + +Once either the client or server send a COMPLETE message then negotiation has completed successfully. Session data may now be transmitted over the connection until it is closed by either side. + +## Session Data +If no SASL QOP (quality of protection) is negotiated, then all subsequent writes to/reads over this connection are written/read unmodified. In particular, messages use Avro [framing](#Message+Framing), and are of the form: + +`| 4-byte frame length | frame data | ... | 4 zero bytes |` + +If a SASL QOP is negotiated, then it must be used by the connection for all subsequent messages. This is done by wrapping each non-empty frame written using the security mechanism and unwrapping each non-empty frame read. The length written in each non-empty frame is the length of the wrapped data. Complete frames must be passed to the security mechanism for unwrapping. Unwrapped data is then passed to the application as the content of the frame. + +If at any point processing fails due to wrapping, unwrapping or framing errors, then all further communication on this connection must cease. + +## Anonymous Mechanism +The SASL anonymous mechanism ([RFC 2245](https://www.ietf.org/rfc/rfc2222.txt)) is quite simple to implement. In particular, an initial anonymous request may be prefixed by the following static sequence: + +`| 0 | 0009 | ANONYMOUS | 0000 |` + +If a server uses the anonymous mechanism, it should check that the mechanism name in the start message prefixing the first request received is 'ANONYMOUS', then simply prefix its initial response with a COMPLETE message of: + +`| 3 | 0000 |` + +If an anonymous server recieves some other mechanism name, then it may respond with a FAIL message as simple as: + +`| 2 | 0000 |` + +Note that the anonymous mechanism need add no additional round-trip messages between client and server. The START message can be piggybacked on the initial request and the COMPLETE or FAIL message can be piggybacked on the initial response. diff --git a/doc/content/en/docs/1.11.2/Specification/_index.md b/doc/content/en/docs/1.11.2/Specification/_index.md new file mode 100755 index 00000000000..df641e2db69 --- /dev/null +++ b/doc/content/en/docs/1.11.2/Specification/_index.md @@ -0,0 +1,849 @@ +--- +title: "Specification" +linkTitle: "Specification" +weight: 4 +date: 2021-10-25 +aliases: +- spec.html +--- + + + +## Introduction +This document defines Apache Avro. It is intended to be the authoritative specification. Implementations of Avro must adhere to this document. + +## Schema Declaration {#schema-declaration} +A Schema is represented in [JSON](https://www.json.org/) by one of: + +* A JSON string, naming a defined type. +* A JSON object, of the form: +```js +{"type": "typeName", ...attributes...} +``` +where _typeName_ is either a primitive or derived type name, as defined below. Attributes not defined in this document are permitted as metadata, but must not affect the format of serialized data. +* A JSON array, representing a union of embedded types. + +## Primitive Types +The set of primitive type names is: + +* _null_: no value +* _boolean_: a binary value +* _int_: 32-bit signed integer +* _long_: 64-bit signed integer +* _float_: single precision (32-bit) IEEE 754 floating-point number +* _double_: double precision (64-bit) IEEE 754 floating-point number +* _bytes_: sequence of 8-bit unsigned bytes +* _string_: unicode character sequence + +Primitive types have no specified attributes. + +Primitive type names are also defined type names. Thus, for example, the schema "string" is equivalent to: +```json +{"type": "string"} +``` + +## Complex Types +Avro supports six kinds of complex types: _records_, _enums_, _arrays_, _maps_, _unions_ and _fixed_. + +### Records {#schema-record} +Records use the type name "record" and support the following attributes: + +* _name_: a JSON string providing the name of the record (required). +* _namespace_, a JSON string that qualifies the name (optional); +* _doc_: a JSON string providing documentation to the user of this schema (optional). +* _aliases_: a JSON array of strings, providing alternate names for this record (optional). +* _fields_: a JSON array, listing fields (required). Each field is a JSON object with the following attributes: + * _name_: a JSON string providing the name of the field (required), and + * _doc_: a JSON string describing this field for users (optional). + * _type_: a [schema]({{< ref "#schema-declaration" >}} "Schema declaration"), as defined above + * _default_: A default value for this field, only used when reading instances that lack the field for schema evolution purposes. The presence of a default value does not make the field optional at encoding time. Permitted values depend on the field's schema type, according to the table below. Default values for union fields correspond to the first schema in the union. Default values for bytes and fixed fields are JSON strings, where Unicode code points 0-255 are mapped to unsigned 8-bit byte values 0-255. Avro encodes a field even if its value is equal to its default. + +*field default values* + +| **avro type** | **json type** | **example** | +|---------------|----------------|-------------| +| null | null | `null` | +| boolean | boolean | `true` | +| int,long | integer | `1` | +| float,double | number | `1.1` | +| bytes | string | `"\u00FF"` | +| string | string | `"foo"` | +| record | object | `{"a": 1}` | +| enum | string | `"FOO"` | +| array | array | `[1]` | +| map | object | `{"a": 1}` | +| fixed | string | `"\u00ff"` | + + * _order_: specifies how this field impacts sort ordering of this record (optional). Valid values are "ascending" (the default), "descending", or "ignore". For more details on how this is used, see the sort order section below. + * _aliases_: a JSON array of strings, providing alternate names for this field (optional). + +For example, a linked-list of 64-bit values may be defined with: +```jsonc +{ + "type": "record", + "name": "LongList", + "aliases": ["LinkedLongs"], // old name for this + "fields" : [ + {"name": "value", "type": "long"}, // each element has a long + {"name": "next", "type": ["null", "LongList"]} // optional next element + ] +} +``` + +### Enums +Enums use the type name "enum" and support the following attributes: + +* _name_: a JSON string providing the name of the enum (required). +* _namespace_, a JSON string that qualifies the name (optional); +* _aliases_: a JSON array of strings, providing alternate names for this enum (optional). +* _doc_: a JSON string providing documentation to the user of this schema (optional). +* _symbols_: a JSON array, listing symbols, as JSON strings (required). All symbols in an enum must be unique; duplicates are prohibited. Every symbol must match the regular expression [A-Za-z_][A-Za-z0-9_]* (the same requirement as for [names]({{< ref "#names" >}} "Names")). +* _default_: A default value for this enumeration, used during resolution when the reader encounters a symbol from the writer that isn't defined in the reader's schema (optional). The value provided here must be a JSON string that's a member of the symbols array. See documentation on schema resolution for how this gets used. + +For example, playing card suits might be defined with: +```json +{ + "type": "enum", + "name": "Suit", + "symbols" : ["SPADES", "HEARTS", "DIAMONDS", "CLUBS"] +} +``` + +### Arrays +Arrays use the type name "array" and support a single attribute: + +* _items_: the schema of the array's items. + +For example, an array of strings is declared with: +```json +{ + "type": "array", + "items" : "string", + "default": [] +} +``` + +### Maps +Maps use the type name "map" and support one attribute: + +* _values_: the schema of the map's values. + +Map keys are assumed to be strings. + +For example, a map from string to long is declared with: +```json +{ + "type": "map", + "values" : "long", + "default": {} +} +``` + +### Unions +Unions, as mentioned above, are represented using JSON arrays. For example, `["null", "string"]` declares a schema which may be either a null or string. + +(Note that when a [default value]({{< ref "#schema-record" >}} "Schema record") is specified for a record field whose type is a union, the type of the default value must match the first element of the union. Thus, for unions containing "null", the "null" is usually listed first, since the default value of such unions is typically null.) + +Unions may not contain more than one schema with the same type, except for the named types record, fixed and enum. For example, unions containing two array types or two map types are not permitted, but two types with different names are permitted. (Names permit efficient resolution when reading and writing unions.) + +Unions may not immediately contain other unions. + +### Fixed +Fixed uses the type name "fixed" and supports the following attributes: + +* _name_: a string naming this fixed (required). +* _namespace_, a string that qualifies the name (optional); +* _aliases_: a JSON array of strings, providing alternate names for this enum (optional). +* _size_: an integer, specifying the number of bytes per value (required). + +For example, 16-byte quantity may be declared with: +```json +{"type": "fixed", "size": 16, "name": "md5"} +``` + +### Names {#names} +Record, enums and fixed are named types. Each has a fullname that is composed of two parts; a name and a namespace, separated by a dot. Equality of names is defined on the fullname. + +Record fields and enum symbols have names as well (but no namespace). Equality of fields and enum symbols is defined on the name of the field/symbol within its scope (the record/enum that defines it). Fields and enum symbols across scopes are never equal. + +The name portion of the fullname of named types, record field names, and enum symbols must: + +* start with [A-Za-z_] +* subsequently contain only [A-Za-z0-9_] + +A namespace is a dot-separated sequence of such names. The empty string may also be used as a namespace to indicate the null namespace. Equality of names (including field names and enum symbols) as well as fullnames is case-sensitive. + +The null namespace may not be used in a dot-separated sequence of names. So the grammar for a namespace is: +``` + | [()*] +``` + +In record, enum and fixed definitions, the fullname is determined according to the algorithm below the example: + +``` +{ + "type": "record", + "name": "Example", + "doc": "A simple name (attribute) and no namespace attribute: use the null namespace (\"\"); the fullname is 'Example'.", + "fields": [ + { + "name": "inheritNull", + "type": { + "type": "enum", + "name": "Simple", + "doc": "A simple name (attribute) and no namespace attribute: inherit the null namespace of the enclosing type 'Example'. The fullname is 'Simple'.", + "symbols": ["a", "b"] + } + }, { + "name": "explicitNamespace", + "type": { + "type": "fixed", + "name": "Simple", + "namespace": "explicit", + "doc": "A simple name (attribute) and a namespace (attribute); the fullname is 'explicit.Simple' (this is a different type than of the 'inheritNull' field).", + "size": 12 + } + }, { + "name": "fullName", + "type": { + "type": "record", + "name": "a.full.Name", + "namespace": "ignored", + "doc": "A name attribute with a fullname, so the namespace attribute is ignored. The fullname is 'a.full.Name', and the namespace is 'a.full'.", + "fields": [ + { + "name": "inheritNamespace", + "type": { + "type": "enum", + "name": "Understanding", + "doc": "A simple name (attribute) and no namespace attribute: inherit the namespace of the enclosing type 'a.full.Name'. The fullname is 'a.full.Understanding'.", + "symbols": ["d", "e"] + } + } + ] + } + } + ] +} +``` + +The fullname of a record, enum or fixed definition is determined by the required `name` and optional `namespace` attributes like this: + +* A fullname is specified. If the name specified contains a dot, then it is assumed to be a fullname, and any namespace also specified is ignored. For example, use "name": "org.foo.X" to indicate the fullname org.foo.X. +* A simple name (a name that contains no dots) and namespace are both specified. For example, one might use "name": "X", "namespace": "org.foo" to indicate the fullname org.foo.X. +* A simple name only is specified (a name that contains no dots). In this case the namespace is taken from the most tightly enclosing named schema or protocol, and the fullname is constructed from that namespace and the name. For example, if "name": "X" is specified, and this occurs within a field of the record definition of org.foo.Y, then the fullname is org.foo.X. This also happens if there is no enclosing namespace (i.e., the enclosing schema definition has the null namespace). + +References to previously defined names are as in the latter two cases above: if they contain a dot they are a fullname, if they do not contain a dot, the namespace is the namespace of the enclosing definition. + +Primitive type names (`null`, `boolean`, `int`, `long`, `float`, `double`, `bytes`, `string`) have no namespace and their names may not be defined in any namespace. + +Complex types (`record`, `enum`, `array`, `map`, `fixed`) have no namespace, but their names (as well as `union`) are permitted to be reused as type names. This can be confusing to the human reader, but is always unambiguous for binary serialization. Due to the limitations of JSON encoding, it is a best practice to use a namespace when using these names. + +A schema or protocol may not contain multiple definitions of a fullname. Further, a name must be defined before it is used ("before" in the depth-first, left-to-right traversal of the JSON parse tree, where the types attribute of a protocol is always deemed to come "before" the messages attribute.) + +### Aliases +Named types and fields may have aliases. An implementation may optionally use aliases to map a writer's schema to the reader's. This facilitates both schema evolution as well as processing disparate datasets. + +Aliases function by re-writing the writer's schema using aliases from the reader's schema. For example, if the writer's schema was named "Foo" and the reader's schema is named "Bar" and has an alias of "Foo", then the implementation would act as though "Foo" were named "Bar" when reading. Similarly, if data was written as a record with a field named "x" and is read as a record with a field named "y" with alias "x", then the implementation would act as though "x" were named "y" when reading. + +A type alias may be specified either as a fully namespace-qualified, or relative to the namespace of the name it is an alias for. For example, if a type named "a.b" has aliases of "c" and "x.y", then the fully qualified names of its aliases are "a.c" and "x.y". + +## Data Serialization and Deserialization +Binary encoded Avro data does not include type information or field names. The benefit is that the serialized data is small, but as a result a schema must always be used in order to read Avro data correctly. The best way to ensure that the schema is structurally identical to the one used to write the data is to use the exact same schema. + +Therefore, files or systems that store Avro data should always include the writer's schema for that data. Avro-based remote procedure call (RPC) systems must also guarantee that remote recipients of data have a copy of the schema used to write that data. In general, it is advisable that any reader of Avro data should use a schema that is the same (as defined more fully in [Parsing Canonical Form for Schemas]({{< ref "#parsing-canonical-form-for-schemas" >}} "Parsing Canonical Form for Schemas")) as the schema that was used to write the data in order to deserialize it correctly. Deserializing data into a newer schema is accomplished by specifying an additional schema, the results of which are described in [Schema Resolution]({{< ref "#schema-resolution" >}}). + +In general, both serialization and deserialization proceed as a depth-first, left-to-right traversal of the schema, serializing or deserializing primitive types as they are encountered. Therefore, it is possible, though not advisable, to read Avro data with a schema that does not have the same Parsing Canonical Form as the schema with which the data was written. In order for this to work, the serialized primitive values must be compatible, in order value by value, with the items in the deserialization schema. For example, int and long are always serialized the same way, so an int could be deserialized as a long. Since the compatibility of two schemas depends on both the data and the serialization format (eg. binary is more permissive than JSON because JSON includes field names, eg. a long that is too large will overflow an int), it is simpler and more reliable to use schemas with identical Parsing Canonical Form. + +### Encodings +Avro specifies two serialization encodings: binary and JSON. Most applications will use the binary encoding, as it is smaller and faster. But, for debugging and web-based applications, the JSON encoding may sometimes be appropriate. + +### Binary Encoding {#binary-encoding} +Binary encoding does not include field names, self-contained information about the types of individual bytes, nor field or record separators. Therefore readers are wholly reliant on the schema used when the data was encoded. + +#### Primitive Types +Primitive types are encoded in binary as follows: + +* _null_ is written as zero bytes. +* a _boolean_ is written as a single byte whose value is either 0 (false) or 1 (true). +* _int_ and _long_ values are written using [variable-length](https://lucene.apache.org/java/3_5_0/fileformats.html#VInt) [zig-zag](https://code.google.com/apis/protocolbuffers/docs/encoding.html#types) coding. Some examples: + +| *value* | *hex* | +|---|---| +| 0 | 00 | +|-1 | 01 | +| 1 | 02 | +|-2 | 03 | +| 2 | 04 | +|...|...| +|-64 | 7f | +|64 | 80 01| +|...|...| + +* a _float_ is written as 4 bytes. The float is converted into a 32-bit integer using a method equivalent to Java's [floatToIntBits](https://docs.oracle.com/javase/8/docs/api/java/lang/Float.html#floatToIntBits-float-) and then encoded in little-endian format. +* a _double_ is written as 8 bytes. The double is converted into a 64-bit integer using a method equivalent to Java's [doubleToLongBits](https://docs.oracle.com/javase/8/docs/api/java/lang/Double.html#doubleToLongBits-double-) and then encoded in little-endian format. +* _bytes_ are encoded as a long followed by that many bytes of data. +* a _string_ is encoded as a long followed by that many bytes of UTF-8 encoded character data. +For example, the three-character string "foo" would be encoded as the long value 3 (encoded as hex 06) followed by the UTF-8 encoding of 'f', 'o', and 'o' (the hex bytes 66 6f 6f): +``` +06 66 6f 6f +``` + +### Complex Types +Complex types are encoded in binary as follows: + +#### Records +A record is encoded by encoding the values of its fields in the order that they are declared. In other words, a record is encoded as just the concatenation of the encodings of its fields. Field values are encoded per their schema. + +For example, the record schema +```json +{ + "type": "record", + "name": "test", + "fields" : [ + {"name": "a", "type": "long"}, + {"name": "b", "type": "string"} + ] +} +``` + +An instance of this record whose a field has value 27 (encoded as hex 36) and whose b field has value "foo" (encoded as hex bytes 06 66 6f 6f), would be encoded simply as the concatenation of these, namely the hex byte sequence: +``` +36 06 66 6f 6f +``` + +#### Enums +An enum is encoded by a int, representing the zero-based position of the symbol in the schema. + +For example, consider the enum: +```json +{"type": "enum", "name": "Foo", "symbols": ["A", "B", "C", "D"] } +``` + +This would be encoded by an int between zero and three, with zero indicating "A", and 3 indicating "D". + +#### Arrays +Arrays are encoded as a series of blocks. Each block consists of a long count value, followed by that many array items. A block with count zero indicates the end of the array. Each item is encoded per the array's item schema. + +If a block's count is negative, its absolute value is used, and the count is followed immediately by a long block size indicating the number of bytes in the block. This block size permits fast skipping through data, e.g., when projecting a record to a subset of its fields. + +For example, the array schema +```json +{"type": "array", "items": "long"} +``` +an array containing the items 3 and 27 could be encoded as the long value 2 (encoded as hex 04) followed by long values 3 and 27 (encoded as hex 06 36) terminated by zero: +``` +04 06 36 00 +``` + +The blocked representation permits one to read and write arrays larger than can be buffered in memory, since one can start writing items without knowing the full length of the array. + +#### Maps {#schema-maps} +Maps are encoded as a series of _blocks_. Each block consists of a `long` _count_ value, followed by that many key/value pairs. A block with count zero indicates the end of the map. Each item is encoded per the map's value schema. + +If a block's count is negative, its absolute value is used, and the count is followed immediately by a `long` block size indicating the number of bytes in the block. This block size permits fast skipping through data, e.g., when projecting a record to a subset of its fields. + +The blocked representation permits one to read and write maps larger than can be buffered in memory, since one can start writing items without knowing the full length of the map. + +#### Unions +A union is encoded by first writing an `int` value indicating the zero-based position within the union of the schema of its value. The value is then encoded per the indicated schema within the union. + +For example, the union schema `["null","string"]` would encode: + +* _null_ as zero (the index of "null" in the union): +`00` +* the string "a" as one (the index of "string" in the union, 1, encoded as hex 02), followed by the serialized string: +`02 02 61` +NOTE: Currently for C/C++ implementations, the positions are practically an int, but theoretically a long. In reality, we don't expect unions with 215M members + +#### Fixed +Fixed instances are encoded using the number of bytes declared in the schema. + +### JSON Encoding +Except for unions, the JSON encoding is the same as is used to encode [field default values]({{< ref "#schema-record" >}}). + +The value of a union is encoded in JSON as follows: + +* if its type is _null_, then it is encoded as a JSON _null_; +* otherwise it is encoded as a JSON object with one name/value pair whose name is the type's name and whose value is the recursively encoded value. For Avro's named types (record, fixed or enum) the user-specified name is used, for other types the type name is used. + +For example, the union schema `["null","string","Foo"]`, where Foo is a record name, would encode: + +* _null_ as _null_; +* the string "a" as `{"string": "a"}` and +* a Foo instance as `{"Foo": {...}}`, where `{...}` indicates the JSON encoding of a Foo instance. + +Note that the original schema is still required to correctly process JSON-encoded data. For example, the JSON encoding does not distinguish between _int_ and _long_, _float_ and _double_, records and maps, enums and strings, etc. + +#### Single-object encoding +In some situations a single Avro serialized object is to be stored for a longer period of time. One very common example is storing Avro records for several weeks in an [Apache Kafka](https://kafka.apache.org/) topic. + +In the period after a schema change this persistence system will contain records that have been written with different schemas. So the need arises to know which schema was used to write a record to support schema evolution correctly. In most cases the schema itself is too large to include in the message, so this binary wrapper format supports the use case more effectively. + +##### Single object encoding specification +Single Avro objects are encoded as follows: + +1. A two-byte marker, `C3 01`, to show that the message is Avro and uses this single-record format (version 1). +1. The 8-byte little-endian CRC-64-AVRO [fingerprint]({{< ref "#schema-fingerprints" >}} "Schema fingerprints") of the object's schema. +1. The Avro object encoded using [Avro's binary encoding]({{< ref "#binary-encoding" >}}). + +Implementations use the 2-byte marker to determine whether a payload is Avro. This check helps avoid expensive lookups that resolve the schema from a fingerprint, when the message is not an encoded Avro payload. + +## Sort Order +Avro defines a standard sort order for data. This permits data written by one system to be efficiently sorted by another system. This can be an important optimization, as sort order comparisons are sometimes the most frequent per-object operation. Note also that Avro binary-encoded data can be efficiently ordered without deserializing it to objects. + +Data items may only be compared if they have identical schemas. Pairwise comparisons are implemented recursively with a depth-first, left-to-right traversal of the schema. The first mismatch encountered determines the order of the items. + +Two items with the same schema are compared according to the following rules. + +* _null_ data is always equal. +* _boolean_ data is ordered with false before true. +* _int_, _long_, _float_ and _double_ data is ordered by ascending numeric value. +* _bytes_ and fixed data are compared lexicographically by unsigned 8-bit values. +* _string_ data is compared lexicographically by Unicode code point. Note that since UTF-8 is used as the binary encoding for strings, sorting of bytes and string binary data is identical. +* _array_ data is compared lexicographically by element. +* _enum_ data is ordered by the symbol's position in the enum schema. For example, an enum whose symbols are `["z", "a"]` would sort "z" values before "a" values. +* _union_ data is first ordered by the branch within the union, and, within that, by the type of the branch. For example, an `["int", "string"]` union would order all int values before all string values, with the ints and strings themselves ordered as defined above. +* _record_ data is ordered lexicographically by field. If a field specifies that its order is: + * "ascending", then the order of its values is unaltered. + * "descending", then the order of its values is reversed. + * "ignore", then its values are ignored when sorting. +* _map_ data may not be compared. It is an error to attempt to compare data containing maps unless those maps are in an `"order":"ignore"` record field. + +## Object Container Files +Avro includes a simple object container file format. A file has a schema, and all objects stored in the file must be written according to that schema, using binary encoding. Objects are stored in blocks that may be compressed. Syncronization markers are used between blocks to permit efficient splitting of files for MapReduce processing. + +Files may include arbitrary user-specified metadata. + +A file consists of: + +* A file header, followed by +* one or more file data blocks. + +A file header consists of: + +* Four bytes, ASCII 'O', 'b', 'j', followed by 1. +* file metadata, including the schema. +* The 16-byte, randomly-generated sync marker for this file. + +File metadata is written as if defined by the following [map]({{< ref "#schema-maps" >}}) schema: +```json +{"type": "map", "values": "bytes"} +``` +All metadata properties that start with "avro." are reserved. The following file metadata properties are currently used: + +* **avro.schema** contains the schema of objects stored in the file, as JSON data (required). +* **avro.codec** the name of the compression codec used to compress blocks, as a string. Implementations are required to support the following codecs: "null" and "deflate". If codec is absent, it is assumed to be "null". The codecs are described with more detail below. + +A file header is thus described by the following schema: +```json +{"type": "record", "name": "org.apache.avro.file.Header", + "fields" : [ + {"name": "magic", "type": {"type": "fixed", "name": "Magic", "size": 4}}, + {"name": "meta", "type": {"type": "map", "values": "bytes"}}, + {"name": "sync", "type": {"type": "fixed", "name": "Sync", "size": 16}} + ] +} +``` + +A file data block consists of: + +* A long indicating the count of objects in this block. +* A long indicating the size in bytes of the serialized objects in the current block, after any codec is applied +* The serialized objects. If a codec is specified, this is compressed by that codec. +* The file's 16-byte sync marker. + +A file data block is thus described by the following schema: +```json +{"type": "record", "name": "org.apache.avro.file.DataBlock", + "fields" : [ + {"name": "count", "type": "long"}, + {"name": "data", "type": "bytes"}, + {"name": "sync", "type": {"type": "fixed", "name": "Sync", "size": 16}} + ] +} +``` + +Each block's binary data can be efficiently extracted or skipped without deserializing the contents. The combination of block size, object counts, and sync markers enable detection of corrupt blocks and help ensure data integrity. + +### Required Codecs + +_null_ + +The "null" codec simply passes through data uncompressed. + +_deflate_ + +The "deflate" codec writes the data block using the deflate algorithm as specified in [RFC 1951](https://www.isi.edu/in-notes/rfc1951.txt), and typically implemented using the zlib library. Note that this format (unlike the "zlib format" in RFC 1950) does not have a checksum. + +### Optional Codecs +_bzip2_ + +The "bzip2" codec uses the [bzip2](https://sourceware.org/bzip2/) compression library. + +_snappy_ + +The "snappy" codec uses Google's [Snappy](https://code.google.com/p/snappy/) compression library. Each compressed block is followed by the 4-byte, big-endian CRC32 checksum of the uncompressed data in the block. + +_xz_ + +The "xz" codec uses the [XZ](https://tukaani.org/xz/) compression library. + +_zstandard_ + +The "zstandard" codec uses Facebook's [Zstandard](https://facebook.github.io/zstd/) compression library. + +### Protocol Declaration +Avro protocols describe RPC interfaces. Like schemas, they are defined with JSON text. + +A protocol is a JSON object with the following attributes: + +* _protocol_, a string, the name of the protocol (required); +* _namespace_, an optional string that qualifies the name (optional); +* _doc_, an optional string describing this protocol; +* _types_, an optional list of definitions of named types (records, enums, fixed and errors). An error definition is just like a record definition except it uses "error" instead of "record". Note that forward references to named types are not permitted. +* _messages_, an optional JSON object whose keys are message names and whose values are objects whose attributes are described below. No two messages may have the same name. + +The name and namespace qualification rules defined for schema objects apply to protocols as well. + +### Messages +A message has attributes: + +* a _doc_, an optional description of the message, +* a _request_, a list of named, typed parameter schemas (this has the same form as the fields of a record declaration); +* a _response_ schema; +* an optional union of declared error schemas. The effective union has "string" prepended to the declared union, to permit transmission of undeclared "system" errors. For example, if the declared error union is `["AccessError"]`, then the effective union is `["string", "AccessError"]`. When no errors are declared, the effective error union is `["string"]`. Errors are serialized using the effective union; however, a protocol's JSON declaration contains only the declared union. +* an optional one-way boolean parameter. + +A request parameter list is processed equivalently to an anonymous record. Since record field lists may vary between reader and writer, request parameters may also differ between the caller and responder, and such differences are resolved in the same manner as record field differences. + +The one-way parameter may only be true when the response type is `"null"` and no errors are listed. + +### Sample Protocol +For example, one may define a simple HelloWorld protocol with: +```json +{ + "namespace": "com.acme", + "protocol": "HelloWorld", + "doc": "Protocol Greetings", + + "types": [ + {"name": "Greeting", "type": "record", "fields": [ + {"name": "message", "type": "string"}]}, + {"name": "Curse", "type": "error", "fields": [ + {"name": "message", "type": "string"}]} + ], + + "messages": { + "hello": { + "doc": "Say hello.", + "request": [{"name": "greeting", "type": "Greeting" }], + "response": "Greeting", + "errors": ["Curse"] + } + } +} +``` + +## Protocol Wire Format + +### Message Transport +Messages may be transmitted via different transport mechanisms. + +To the transport, a _message_ is an opaque byte sequence. + +A transport is a system that supports: + +* **transmission of request messages** +* **receipt of corresponding response messages** +Servers may send a response message back to the client corresponding to a request message. The mechanism of correspondence is transport-specific. For example, in HTTP it is implicit, since HTTP directly supports requests and responses. But a transport that multiplexes many client threads over a single socket would need to tag messages with unique identifiers. + +Transports may be either stateless or stateful. In a stateless transport, messaging assumes no established connection state, while stateful transports establish connections that may be used for multiple messages. This distinction is discussed further in the [handshake](#handshake) section below. + +#### HTTP as Transport +When [HTTP](https://www.w3.org/Protocols/rfc2616/rfc2616.html) is used as a transport, each Avro message exchange is an HTTP request/response pair. All messages of an Avro protocol should share a single URL at an HTTP server. Other protocols may also use that URL. Both normal and error Avro response messages should use the 200 (OK) response code. The chunked encoding may be used for requests and responses, but, regardless the Avro request and response are the entire content of an HTTP request and response. The HTTP Content-Type of requests and responses should be specified as "avro/binary". Requests should be made using the POST method. + +HTTP is used by Avro as a stateless transport. + +### Message Framing +Avro messages are _framed_ as a list of buffers. + +Framing is a layer between messages and the transport. It exists to optimize certain operations. + +The format of framed message data is: + +* a series of buffers, where each buffer consists of: + * a four-byte, big-endian _buffer length_, followed by + * that many bytes of _buffer_ data. +* a message is always terminated by a zero-length buffer. + +Framing is transparent to request and response message formats (described below). Any message may be presented as a single or multiple buffers. + +Framing can permit readers to more efficiently get different buffers from different sources and for writers to more efficiently store different buffers to different destinations. In particular, it can reduce the number of times large binary objects are copied. For example, if an RPC parameter consists of a megabyte of file data, that data can be copied directly to a socket from a file descriptor, and, on the other end, it could be written directly to a file descriptor, never entering user space. + +A simple, recommended, framing policy is for writers to create a new segment whenever a single binary object is written that is larger than a normal output buffer. Small objects are then appended in buffers, while larger objects are written as their own buffers. When a reader then tries to read a large object the runtime can hand it an entire buffer directly, without having to copy it. + +### Handshake +The purpose of the handshake is to ensure that the client and the server have each other's protocol definition, so that the client can correctly deserialize responses, and the server can correctly deserialize requests. Both clients and servers should maintain a cache of recently seen protocols, so that, in most cases, a handshake will be completed without extra round-trip network exchanges or the transmission of full protocol text. + +RPC requests and responses may not be processed until a handshake has been completed. With a stateless transport, all requests and responses are prefixed by handshakes. With a stateful transport, handshakes are only attached to requests and responses until a successful handshake response has been returned over a connection. After this, request and response payloads are sent without handshakes for the lifetime of that connection. + +The handshake process uses the following record schemas: +```json +{ + "type": "record", + "name": "HandshakeRequest", "namespace":"org.apache.avro.ipc", + "fields": [ + {"name": "clientHash", + "type": {"type": "fixed", "name": "MD5", "size": 16}}, + {"name": "clientProtocol", "type": ["null", "string"]}, + {"name": "serverHash", "type": "MD5"}, + {"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]} + ] +} +{ + "type": "record", + "name": "HandshakeResponse", "namespace": "org.apache.avro.ipc", + "fields": [ + {"name": "match", + "type": {"type": "enum", "name": "HandshakeMatch", + "symbols": ["BOTH", "CLIENT", "NONE"]}}, + {"name": "serverProtocol", + "type": ["null", "string"]}, + {"name": "serverHash", + "type": ["null", {"type": "fixed", "name": "MD5", "size": 16}]}, + {"name": "meta", + "type": ["null", {"type": "map", "values": "bytes"}]} + ] +} +``` + +* A client first prefixes each request with a `HandshakeRequest` containing just the hash of its protocol and of the server's protocol (`clientHash!=null, clientProtocol=null, serverHash!=null`), where the hashes are 128-bit MD5 hashes of the JSON protocol text. If a client has never connected to a given server, it sends its hash as a guess of the server's hash, otherwise it sends the hash that it previously obtained from this server. +The server responds with a HandshakeResponse containing one of: + * `match=BOTH, serverProtocol=null, serverHash=null` if the client sent the valid hash of the server's protocol and the server knows what protocol corresponds to the client's hash. In this case, the request is complete and the response data immediately follows the HandshakeResponse. + * `match=CLIENT, serverProtocol!=null, serverHash!=null` if the server has previously seen the client's protocol, but the client sent an incorrect hash of the server's protocol. The request is complete and the response data immediately follows the HandshakeResponse. The client must use the returned protocol to process the response and should also cache that protocol and its hash for future interactions with this server. + * `match=NONE` if the server has not previously seen the client's protocol. The serverHash and serverProtocol may also be non-null if the server's protocol hash was incorrect. +In this case the client must then re-submit its request with its protocol text (`clientHash!=null, clientProtocol!=null, serverHash!=null`) and the server should respond with a successful match (match=BOTH, serverProtocol=null, serverHash=null) as above. + +The meta field is reserved for future handshake enhancements. + +### Call Format +A _call_ consists of a request message paired with its resulting response or error message. Requests and responses contain extensible metadata, and both kinds of messages are framed as described above. + +The format of a call request is: + +* _request metadata_, a map with values of type bytes +* the _message name_, an Avro string, followed by +* the _message parameters_. Parameters are serialized according to the message's request declaration. +When the empty string is used as a message name a server should ignore the parameters and return an empty response. A client may use this to ping a server or to perform a handshake without sending a protocol message. + +When a message is declared one-way and a stateful connection has been established by a successful handshake response, no response data is sent. Otherwise the format of the call response is: + +* _response metadata_, a map with values of type bytes +* a one-byte error _flag_ boolean, followed by either: + * if the error flag is false, the message _response_, serialized per the message's response schema. + * if the error flag is true, the _error_, serialized per the message's effective error union schema. + +### Schema Resolution {#schema-resolution} +A reader of Avro data, whether from an RPC or a file, can always parse that data because the original schema must be provided along with the data. However, the reader may be programmed to read data into a different schema. For example, if the data was written with a different version of the software than it is read, then fields may have been added or removed from records. This section specifies how such schema differences should be resolved. + +We refer to the schema used to write the data as the writer's schema, and the schema that the application expects the reader's schema. Differences between these should be resolved as follows: + +* It is an error if the two schemas do not _match_. +To match, one of the following must hold: + * both schemas are arrays whose item types match + * both schemas are maps whose value types match + * both schemas are enums whose (unqualified) names match + * both schemas are fixed whose sizes and (unqualified) names match + * both schemas are records with the same (unqualified) name + * either schema is a union + * both schemas have same primitive type + * the writer's schema may be promoted to the reader's as follows: + * int is promotable to long, float, or double + * long is promotable to float or double + * float is promotable to double + * string is promotable to bytes + * bytes is promotable to string +* **if both are records**: + * the ordering of fields may be different: fields are matched by name. + * schemas for fields with the same name in both records are resolved recursively. + * if the writer's record contains a field with a name not present in the reader's record, the writer's value for that field is ignored. + * if the reader's record schema has a field that contains a default value, and writer's schema does not have a field with the same name, then the reader should use the default value from its field. + * if the reader's record schema has a field with no default value, and writer's schema does not have a field with the same name, an error is signalled. +* **if both are enums**: +if the writer's symbol is not present in the reader's enum and the reader has a default value, then that value is used, otherwise an error is signalled. + +* **if both are arrays**: +This resolution algorithm is applied recursively to the reader's and writer's array item schemas. + +* **if both are maps**: +This resolution algorithm is applied recursively to the reader's and writer's value schemas. + +* **if both are unions**: +The first schema in the reader's union that matches the selected writer's union schema is recursively resolved against it. if none match, an error is signalled. + +* **if reader's is a union, but writer's is not** +The first schema in the reader's union that matches the writer's schema is recursively resolved against it. If none match, an error is signalled. + +* **if writer's is a union, but reader's is not** +If the reader's schema matches the selected writer's schema, it is recursively resolved against it. If they do not match, an error is signalled. + +A schema's _doc_ fields are ignored for the purposes of schema resolution. Hence, the _doc_ portion of a schema may be dropped at serialization. + +### Parsing Canonical Form for Schemas {#parsing-canonical-form-for-schemas} +One of the defining characteristics of Avro is that a reader must use the schema used by the writer of the data in order to know how to read the data. This assumption results in a data format that's compact and also amenable to many forms of schema evolution. However, the specification so far has not defined what it means for the reader to have the "same" schema as the writer. Does the schema need to be textually identical? Well, clearly adding or removing some whitespace to a JSON expression does not change its meaning. At the same time, reordering the fields of records clearly does change the meaning. So what does it mean for a reader to have "the same" schema as a writer? + +Parsing Canonical Form is a transformation of a writer's schema that let's us define what it means for two schemas to be "the same" for the purpose of reading data written against the schema. It is called Parsing Canonical Form because the transformations strip away parts of the schema, like "doc" attributes, that are irrelevant to readers trying to parse incoming data. It is called Canonical Form because the transformations normalize the JSON text (such as the order of attributes) in a way that eliminates unimportant differences between schemas. If the Parsing Canonical Forms of two different schemas are textually equal, then those schemas are "the same" as far as any reader is concerned, i.e., there is no serialized data that would allow a reader to distinguish data generated by a writer using one of the original schemas from data generated by a writing using the other original schema. (We sketch a proof of this property in a companion document.) + +The next subsection specifies the transformations that define Parsing Canonical Form. But with a well-defined canonical form, it can be convenient to go one step further, transforming these canonical forms into simple integers ("fingerprints") that can be used to uniquely identify schemas. The subsection after next recommends some standard practices for generating such fingerprints. + +#### Transforming into Parsing Canonical Form +Assuming an input schema (in JSON form) that's already UTF-8 text for a _valid_ Avro schema (including all quotes as required by JSON), the following transformations will produce its Parsing Canonical Form: + +* [PRIMITIVES] Convert primitive schemas to their simple form (e.g., int instead of `{"type":"int"}`). +* [FULLNAMES] Replace short names with fullnames, using applicable namespaces to do so. Then eliminate namespace attributes, which are now redundant. +* [STRIP] Keep only attributes that are relevant to parsing data, which are: _type_, _name_, _fields_, _symbols_, _items_, _values_, _size_. Strip all others (e.g., _doc_ and _aliases_). +* [ORDER] Order the appearance of fields of JSON objects as follows: _name_, _type_, _fields_, _symbols_, _items_, _values_, _size_. For example, if an object has _type_, _name_, and _size_ fields, then the _name_ field should appear first, followed by the _type_ and then the _size_ fields. +* [STRINGS] For all JSON string literals in the schema text, replace any escaped characters (e.g., \uXXXX escapes) with their UTF-8 equivalents. +* [INTEGERS] Eliminate quotes around and any leading zeros in front of JSON integer literals (which appear in the _size_ attributes of _fixed_ schemas). +* [WHITESPACE] Eliminate all whitespace in JSON outside of string literals. + +#### Schema Fingerprints {#schema-fingerprints} +"[A] fingerprinting algorithm is a procedure that maps an arbitrarily large data item (such as a computer file) to a much shorter bit string, its fingerprint, that uniquely identifies the original data for all practical purposes" (quoted from [Wikipedia](https://en.wikipedia.org/wiki/Fingerprint_(computing))). In the Avro context, fingerprints of Parsing Canonical Form can be useful in a number of applications; for example, to cache encoder and decoder objects, to tag data items with a short substitute for the writer's full schema, and to quickly negotiate common-case schemas between readers and writers. + +In designing fingerprinting algorithms, there is a fundamental trade-off between the length of the fingerprint and the probability of collisions. To help application designers find appropriate points within this trade-off space, while encouraging interoperability and ease of implementation, we recommend using one of the following three algorithms when fingerprinting Avro schemas: + +* When applications can tolerate longer fingerprints, we recommend using the [SHA-256 digest algorithm](https://en.wikipedia.org/wiki/SHA-2) to generate 256-bit fingerprints of Parsing Canonical Forms. Most languages today have SHA-256 implementations in their libraries. +* At the opposite extreme, the smallest fingerprint we recommend is a 64-bit [Rabin fingerprint](https://en.wikipedia.org/wiki/Rabin_fingerprint). Below, we provide pseudo-code for this algorithm that can be easily translated into any programming language. 64-bit fingerprints should guarantee uniqueness for schema caches of up to a million entries (for such a cache, the chance of a collision is 3E-8). We don't recommend shorter fingerprints, as the chances of collisions is too great (for example, with 32-bit fingerprints, a cache with as few as 100,000 schemas has a 50% chance of having a collision). +* Between these two extremes, we recommend using the [MD5 message digest](https://en.wikipedia.org/wiki/MD5) to generate 128-bit fingerprints. These make sense only where very large numbers of schemas are being manipulated (tens of millions); otherwise, 64-bit fingerprints should be sufficient. As with SHA-256, MD5 implementations are found in most libraries today. + +These fingerprints are not meant to provide any security guarantees, even the longer SHA-256-based ones. Most Avro applications should be surrounded by security measures that prevent attackers from writing random data and otherwise interfering with the consumers of schemas. We recommend that these surrounding mechanisms be used to prevent collision and pre-image attacks (i.e., "forgery") on schema fingerprints, rather than relying on the security properties of the fingerprints themselves. + +Rabin fingerprints are [cyclic redundancy checks](https://en.wikipedia.org/wiki/Cyclic_redundancy_check) computed using irreducible polynomials. In the style of the Appendix of [RFC 1952](https://www.ietf.org/rfc/rfc1952.txt) (pg 10), which defines the CRC-32 algorithm, here's our definition of the 64-bit AVRO fingerprinting algorithm: +```java +long fingerprint64(byte[] buf) { + if (FP_TABLE == null) initFPTable(); + long fp = EMPTY; + for (int i = 0; i < buf.length; i++) + fp = (fp >>> 8) ^ FP_TABLE[(int)(fp ^ buf[i]) & 0xff]; + return fp; +} + +static long EMPTY = 0xc15d213aa4d7a795L; +static long[] FP_TABLE = null; + +void initFPTable() { + FP_TABLE = new long[256]; + for (int i = 0; i < 256; i++) { + long fp = i; + for (int j = 0; j < 8; j++) + fp = (fp >>> 1) ^ (EMPTY & -(fp & 1L)); + FP_TABLE[i] = fp; + } +} +``` + +Readers interested in the mathematics behind this algorithm may want to read [Chapter 14 of the Second Edition of Hacker's Delight](https://books.google.com/books?id=XD9iAwAAQBAJ&pg=PA319). (Unlike RFC-1952 and the book chapter, we prepend a single one bit to messages. We do this because CRCs ignore leading zero bits, which can be problematic. Our code prepends a one-bit by initializing fingerprints using EMPTY, rather than initializing using zero as in RFC-1952 and the book chapter.) + +## Logical Types +A logical type is an Avro primitive or complex type with extra attributes to represent a derived type. The attribute `logicalType` must always be present for a logical type, and is a string with the name of one of the logical types listed later in this section. Other attributes may be defined for particular logical types. + +A logical type is always serialized using its underlying Avro type so that values are encoded in exactly the same way as the equivalent Avro type that does not have a `logicalType` attribute. Language implementations may choose to represent logical types with an appropriate native type, although this is not required. + +Language implementations must ignore unknown logical types when reading, and should use the underlying Avro type. If a logical type is invalid, for example a decimal with scale greater than its precision, then implementations should ignore the logical type and use the underlying Avro type. + +### Decimal +The `decimal` logical type represents an arbitrary-precision signed decimal number of the form _unscaled × 10-scale_. + +A `decimal` logical type annotates Avro _bytes_ or _fixed_ types. The byte array must contain the two's-complement representation of the unscaled integer value in big-endian byte order. The scale is fixed, and is specified using an attribute. + +The following attributes are supported: + +* _scale_, a JSON integer representing the scale (optional). If not specified the scale is 0. +* _precision_, a JSON integer representing the (maximum) precision of decimals stored in this type (required). +For example, the following schema represents decimal numbers with a maximum precision of 4 and a scale of 2: +```json +{ + "type": "bytes", + "logicalType": "decimal", + "precision": 4, + "scale": 2 +} +``` +Precision must be a positive integer greater than zero. If the underlying type is a _fixed_, then the precision is limited by its size. An array of length n can store at most _floor(log10(28 × n - 1 - 1))_ base-10 digits of precision. + +Scale must be zero or a positive integer less than or equal to the precision. + +For the purposes of schema resolution, two schemas that are `decimal` logical types _match_ if their scales and precisions match. + +### UUID +The `uuid` logical type represents a random generated universally unique identifier (UUID). + +A `uuid` logical type annotates an Avro `string`. The string has to conform with [RFC-4122](https://www.ietf.org/rfc/rfc4122.txt) + +### Date +The `date` logical type represents a date within the calendar, with no reference to a particular time zone or time of day. + +A `date` logical type annotates an Avro `int`, where the int stores the number of days from the unix epoch, 1 January 1970 (ISO calendar). + +The following schema represents a date: +```json +{ + "type": "int", + "logicalType": "date" +} +``` + +### Time (millisecond precision) +The `time-millis` logical type represents a time of day, with no reference to a particular calendar, time zone or date, with a precision of one millisecond. + +A `time-millis` logical type annotates an Avro `int`, where the int stores the number of milliseconds after midnight, 00:00:00.000. + +### Time (microsecond precision) +The `time-micros` logical type represents a time of day, with no reference to a particular calendar, time zone or date, with a precision of one microsecond. + +A `time-micros` logical type annotates an Avro `long`, where the long stores the number of microseconds after midnight, 00:00:00.000000. + +### Timestamp (millisecond precision) +The `timestamp-millis` logical type represents an instant on the global timeline, independent of a particular time zone or calendar, with a precision of one millisecond. Please note that time zone information gets lost in this process. Upon reading a value back, we can only reconstruct the instant, but not the original representation. In practice, such timestamps are typically displayed to users in their local time zones, therefore they may be displayed differently depending on the execution environment. + +A `timestamp-millis` logical type annotates an Avro `long`, where the long stores the number of milliseconds from the unix epoch, 1 January 1970 00:00:00.000 UTC. + +### Timestamp (microsecond precision) +The `timestamp-micros` logical type represents an instant on the global timeline, independent of a particular time zone or calendar, with a precision of one microsecond. Please note that time zone information gets lost in this process. Upon reading a value back, we can only reconstruct the instant, but not the original representation. In practice, such timestamps are typically displayed to users in their local time zones, therefore they may be displayed differently depending on the execution environment. + +A `timestamp-micros` logical type annotates an Avro `long`, where the long stores the number of microseconds from the unix epoch, 1 January 1970 00:00:00.000000 UTC. + +### Local timestamp (millisecond precision) +The `local-timestamp-millis` logical type represents a timestamp in a local timezone, regardless of what specific time zone is considered local, with a precision of one millisecond. + +A `local-timestamp-millis` logical type annotates an Avro `long`, where the long stores the number of milliseconds, from 1 January 1970 00:00:00.000. + +### Local timestamp (microsecond precision) +The `local-timestamp-micros` logical type represents a timestamp in a local timezone, regardless of what specific time zone is considered local, with a precision of one microsecond. + +A `local-timestamp-micros` logical type annotates an Avro `long`, where the long stores the number of microseconds, from 1 January 1970 00:00:00.000000. + +### Duration +The `duration` logical type represents an amount of time defined by a number of months, days and milliseconds. This is not equivalent to a number of milliseconds, because, depending on the moment in time from which the duration is measured, the number of days in the month and number of milliseconds in a day may differ. Other standard periods such as years, quarters, hours and minutes can be expressed through these basic periods. + +A `duration` logical type annotates Avro `fixed` type of size 12, which stores three little-endian unsigned integers that represent durations at different granularities of time. The first stores a number in months, the second stores a number in days, and the third stores a number in milliseconds. diff --git a/doc/content/en/docs/1.11.2/_index.md b/doc/content/en/docs/1.11.2/_index.md new file mode 100755 index 00000000000..e123e9a0640 --- /dev/null +++ b/doc/content/en/docs/1.11.2/_index.md @@ -0,0 +1,59 @@ +--- +title: "Apache Avroâ„ĸ 1.11.2 Documentation" +linkTitle: "1.11.2" +type: docs +weight: -1112 +--- + + + +## Introduction + +Apache Avroâ„ĸ is a data serialization system. + +Avro provides: + +* Rich data structures. +* A compact, fast, binary data format. +* A container file, to store persistent data. +* Remote procedure call (RPC). +* Simple integration with dynamic languages. Code generation is not required to read or write data files nor to use or implement RPC protocols. Code generation as an optional optimization, only worth implementing for statically typed languages. + +## Schemas + +Avro relies on schemas. When Avro data is read, the schema used when writing it is always present. This permits each datum to be written with no per-value overheads, making serialization both fast and small. This also facilitates use with dynamic, scripting languages, since data, together with its schema, is fully self-describing. + +When Avro data is stored in a file, its schema is stored with it, so that files may be processed later by any program. If the program reading the data expects a different schema this can be easily resolved, since both schemas are present. + +When Avro is used in RPC, the client and server exchange schemas in the connection handshake. (This can be optimized so that, for most calls, no schemas are actually transmitted.) Since both client and server both have the other's full schema, correspondence between same named fields, missing fields, extra fields, etc. can all be easily resolved. + +Avro schemas are defined with JSON . This facilitates implementation in languages that already have JSON libraries. + +## Comparison with other systems + +Avro provides functionality similar to systems such as [Thrift](https://thrift.apache.org/), [Protocol Buffers](https://code.google.com/p/protobuf/), etc. Avro differs from these systems in the following fundamental aspects. + +* Dynamic typing: Avro does not require that code be generated. Data is always accompanied by a schema that permits full processing of that data without code generation, static datatypes, etc. This facilitates construction of generic data-processing systems and languages. +* Untagged data: Since the schema is present when data is read, considerably less type information need be encoded with data, resulting in smaller serialization size. +* No manually-assigned field IDs: When a schema changes, both the old and new schema are always present when processing data, so differences may be resolved symbolically, using field names. + + diff --git a/doc/content/en/docs/1.11.2/api-c++.md b/doc/content/en/docs/1.11.2/api-c++.md new file mode 100644 index 00000000000..a333321ede4 --- /dev/null +++ b/doc/content/en/docs/1.11.2/api-c++.md @@ -0,0 +1,29 @@ +--- +title: "C++ API" +linkTitle: "C++ API" +weight: 102 +manualLink: /docs/1.11.2/api/cpp/html/ +--- + + + +The C++ API documentation can be found here. diff --git a/doc/content/en/docs/1.11.2/api-c.md b/doc/content/en/docs/1.11.2/api-c.md new file mode 100644 index 00000000000..1b7eeb2f48a --- /dev/null +++ b/doc/content/en/docs/1.11.2/api-c.md @@ -0,0 +1,29 @@ +--- +title: "C API" +linkTitle: "C API" +weight: 101 +manualLink: /docs/1.11.2/api/c/ +--- + + + +The C API documentation can be found here. diff --git a/doc/content/en/docs/1.11.2/api-csharp.md b/doc/content/en/docs/1.11.2/api-csharp.md new file mode 100644 index 00000000000..c7884822649 --- /dev/null +++ b/doc/content/en/docs/1.11.2/api-csharp.md @@ -0,0 +1,29 @@ +--- +title: "C# API" +linkTitle: "C# API" +weight: 103 +manualLink: /docs/1.11.2/api/csharp/html/ +--- + + + +The C# API documentation can be found here. diff --git a/doc/content/en/docs/1.11.2/api-java.md b/doc/content/en/docs/1.11.2/api-java.md new file mode 100644 index 00000000000..496967e7791 --- /dev/null +++ b/doc/content/en/docs/1.11.2/api-java.md @@ -0,0 +1,29 @@ +--- +title: "Java API" +linkTitle: "Java API" +weight: 100 +manualLink: /docs/1.11.2/api/java/ +--- + + + +The Javadocs can be found here. diff --git a/doc/content/en/docs/1.11.2/logo.svg b/doc/content/en/docs/1.11.2/logo.svg new file mode 100644 index 00000000000..b44ed197262 --- /dev/null +++ b/doc/content/en/docs/1.11.2/logo.svg @@ -0,0 +1,22 @@ + + + diff --git a/doc/content/en/docs/1.11.3/Getting started (Java)/_index.md b/doc/content/en/docs/1.11.3/Getting started (Java)/_index.md new file mode 100644 index 00000000000..60e3a827340 --- /dev/null +++ b/doc/content/en/docs/1.11.3/Getting started (Java)/_index.md @@ -0,0 +1,289 @@ +--- +categories: [] +tags: ["java"] +title: "Getting Started (Java)" +linkTitle: "Getting Started (Java)" +weight: 2 +--- + + + +This is a short guide for getting started with Apache Avroâ„ĸ using Java. This guide only covers using Avro for data serialization; see Patrick Hunt's [Avro RPC Quick Start](https://github.com/phunt/avro-rpc-quickstart) for a good introduction to using Avro for RPC. + +## Download + +Avro implementations for C, C++, C#, Java, PHP, Python, and Ruby can be downloaded from the [Apache Avroâ„ĸ Download]({{< relref "/project/download" >}}) page. This guide uses Avro 1.11.3, the latest version at the time of writing. For the examples in this guide, download avro-1.11.3.jar and avro-tools-1.11.3.jar. + +Alternatively, if you are using Maven, add the following dependency to your POM: + +```xml + + org.apache.avro + avro + 1.11.3 + +``` + +As well as the Avro Maven plugin (for performing code generation): + +```xml + + org.apache.avro + avro-maven-plugin + 1.11.3 + + + generate-sources + + schema + + + ${project.basedir}/src/main/avro/ + ${project.basedir}/src/main/java/ + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + 1.8 + 1.8 + + +``` + +You may also build the required Avro jars from source. Building Avro is beyond the scope of this guide; see the Build Documentation page in the wiki for more information. + +## Defining a schema + +Avro schemas are defined using JSON. Schemas are composed of primitive types (null, boolean, int, long, float, double, bytes, and string) and complex types (record, enum, array, map, union, and fixed). You can learn more about Avro schemas and types from the specification, but for now let's start with a simple schema example, user.avsc: + +```json +{"namespace": "example.avro", + "type": "record", + "name": "User", + "fields": [ + {"name": "name", "type": "string"}, + {"name": "favorite_number", "type": ["int", "null"]}, + {"name": "favorite_color", "type": ["string", "null"]} + ] +} +``` + +This schema defines a record representing a hypothetical user. (Note that a schema file can only contain a single schema definition.) At minimum, a record definition must include its type ("type": "record"), a name ("name": "User"), and fields, in this case name, favorite_number, and favorite_color. We also define a namespace ("namespace": "example.avro"), which together with the name attribute defines the "full name" of the schema (example.avro.User in this case). + +Fields are defined via an array of objects, each of which defines a name and type (other attributes are optional, see the record specification for more details). The type attribute of a field is another schema object, which can be either a primitive or complex type. For example, the name field of our User schema is the primitive type string, whereas the favorite_number and favorite_color fields are both unions, represented by JSON arrays. unions are a complex type that can be any of the types listed in the array; e.g., favorite_number can either be an int or null, essentially making it an optional field. + +## Serializing and deserializing with code generation + +### Compiling the schema +Code generation allows us to automatically create classes based on our previously-defined schema. Once we have defined the relevant classes, there is no need to use the schema directly in our programs. We use the avro-tools jar to generate code as follows: + +```shell +java -jar /path/to/avro-tools-1.11.3.jar compile schema +``` + +This will generate the appropriate source files in a package based on the schema's namespace in the provided destination folder. For instance, to generate a User class in package example.avro from the schema defined above, run + +```shell +java -jar /path/to/avro-tools-1.11.3.jar compile schema user.avsc . +``` + +Note that if you using the Avro Maven plugin, there is no need to manually invoke the schema compiler; the plugin automatically performs code generation on any .avsc files present in the configured source directory. + +### Creating Users +Now that we've completed the code generation, let's create some Users, serialize them to a data file on disk, and then read back the file and deserialize the User objects. + +First let's create some Users and set their fields. + +```java +User user1 = new User(); +user1.setName("Alyssa"); +user1.setFavoriteNumber(256); +// Leave favorite color null + +// Alternate constructor +User user2 = new User("Ben", 7, "red"); + +// Construct via builder +User user3 = User.newBuilder() + .setName("Charlie") + .setFavoriteColor("blue") + .setFavoriteNumber(null) + .build(); +``` + +As shown in this example, Avro objects can be created either by invoking a constructor directly or by using a builder. Unlike constructors, builders will automatically set any default values specified in the schema. Additionally, builders validate the data as it set, whereas objects constructed directly will not cause an error until the object is serialized. However, using constructors directly generally offers better performance, as builders create a copy of the datastructure before it is written. + +Note that we do not set user1's favorite color. Since that record is of type ["string", "null"], we can either set it to a string or leave it null; it is essentially optional. Similarly, we set user3's favorite number to null (using a builder requires setting all fields, even if they are null). + +### Serializing +Now let's serialize our Users to disk. + +```java +// Serialize user1, user2 and user3 to disk +DatumWriter userDatumWriter = new SpecificDatumWriter(User.class); +DataFileWriter dataFileWriter = new DataFileWriter(userDatumWriter); +dataFileWriter.create(user1.getSchema(), new File("users.avro")); +dataFileWriter.append(user1); +dataFileWriter.append(user2); +dataFileWriter.append(user3); +dataFileWriter.close(); +``` + +We create a DatumWriter, which converts Java objects into an in-memory serialized format. The SpecificDatumWriter class is used with generated classes and extracts the schema from the specified generated type. + +Next we create a DataFileWriter, which writes the serialized records, as well as the schema, to the file specified in the dataFileWriter.create call. We write our users to the file via calls to the dataFileWriter.append method. When we are done writing, we close the data file. + +### Deserializing +Finally, let's deserialize the data file we just created. + +```java +// Deserialize Users from disk +DatumReader userDatumReader = new SpecificDatumReader(User.class); +DataFileReader dataFileReader = new DataFileReader(file, userDatumReader); +User user = null; +while (dataFileReader.hasNext()) { +// Reuse user object by passing it to next(). This saves us from +// allocating and garbage collecting many objects for files with +// many items. +user = dataFileReader.next(user); +System.out.println(user); +} +``` + +This snippet will output: + +```json +{"name": "Alyssa", "favorite_number": 256, "favorite_color": null} +{"name": "Ben", "favorite_number": 7, "favorite_color": "red"} +{"name": "Charlie", "favorite_number": null, "favorite_color": "blue"} +``` + +Deserializing is very similar to serializing. We create a SpecificDatumReader, analogous to the SpecificDatumWriter we used in serialization, which converts in-memory serialized items into instances of our generated class, in this case User. We pass the DatumReader and the previously created File to a DataFileReader, analogous to the DataFileWriter, which reads both the schema used by the writer as well as the data from the file on disk. The data will be read using the writer's schema included in the file and the schema provided by the reader, in this case the User class. The writer's schema is needed to know the order in which fields were written, while the reader's schema is needed to know what fields are expected and how to fill in default values for fields added since the file was written. If there are differences between the two schemas, they are resolved according to the Schema Resolution specification. + +Next we use the DataFileReader to iterate through the serialized Users and print the deserialized object to stdout. Note how we perform the iteration: we create a single User object which we store the current deserialized user in, and pass this record object to every call of dataFileReader.next. This is a performance optimization that allows the DataFileReader to reuse the same User object rather than allocating a new User for every iteration, which can be very expensive in terms of object allocation and garbage collection if we deserialize a large data file. While this technique is the standard way to iterate through a data file, it's also possible to use for (User user : dataFileReader) if performance is not a concern. + +### Compiling and running the example code +This example code is included as a Maven project in the examples/java-example directory in the Avro docs. From this directory, execute the following commands to build and run the example: + +```shell +$ mvn compile # includes code generation via Avro Maven plugin +$ mvn -q exec:java -Dexec.mainClass=example.SpecificMain +``` + +### Beta feature: Generating faster code +In release 1.9.0, we introduced a new approach to generating code that speeds up decoding of objects by more than 10% and encoding by more than 30% (future performance enhancements are underway). To ensure a smooth introduction of this change into production systems, this feature is controlled by a feature flag, the system property org.apache.avro.specific.use_custom_coders. In this first release, this feature is off by default. To turn it on, set the system flag to true at runtime. In the sample above, for example, you could enable the fater coders as follows: + +$ mvn -q exec:java -Dexec.mainClass=example.SpecificMain \ + -Dorg.apache.avro.specific.use_custom_coders=true + +Note that you do not have to recompile your Avro schema to have access to this feature. The feature is compiled and built into your code, and you turn it on and off at runtime using the feature flag. As a result, you can turn it on during testing, for example, and then off in production. Or you can turn it on in production, and quickly turn it off if something breaks. + +We encourage the Avro community to exercise this new feature early to help build confidence. (For those paying one-demand for compute resources in the cloud, it can lead to meaningful cost savings.) As confidence builds, we will turn this feature on by default, and eventually eliminate the feature flag (and the old code). + +## Serializing and deserializing without code generation +Data in Avro is always stored with its corresponding schema, meaning we can always read a serialized item regardless of whether we know the schema ahead of time. This allows us to perform serialization and deserialization without code generation. + +Let's go over the same example as in the previous section, but without using code generation: we'll create some users, serialize them to a data file on disk, and then read back the file and deserialize the users objects. + +### Creating users +First, we use a Parser to read our schema definition and create a Schema object. + +```java +Schema schema = new Schema.Parser().parse(new File("user.avsc")); +``` + +Using this schema, let's create some users. + +```java +GenericRecord user1 = new GenericData.Record(schema); +user1.put("name", "Alyssa"); +user1.put("favorite_number", 256); +// Leave favorite color null + +GenericRecord user2 = new GenericData.Record(schema); +user2.put("name", "Ben"); +user2.put("favorite_number", 7); +user2.put("favorite_color", "red"); +``` + +Since we're not using code generation, we use GenericRecords to represent users. GenericRecord uses the schema to verify that we only specify valid fields. If we try to set a non-existent field (e.g., user1.put("favorite_animal", "cat")), we'll get an AvroRuntimeException when we run the program. + +Note that we do not set user1's favorite color. Since that record is of type ["string", "null"], we can either set it to a string or leave it null; it is essentially optional. + +### Serializing +Now that we've created our user objects, serializing and deserializing them is almost identical to the example above which uses code generation. The main difference is that we use generic instead of specific readers and writers. + +First we'll serialize our users to a data file on disk. + +```java +// Serialize user1 and user2 to disk +File file = new File("users.avro"); +DatumWriter datumWriter = new GenericDatumWriter(schema); +DataFileWriter dataFileWriter = new DataFileWriter(datumWriter); +dataFileWriter.create(schema, file); +dataFileWriter.append(user1); +dataFileWriter.append(user2); +dataFileWriter.close(); +``` + +We create a DatumWriter, which converts Java objects into an in-memory serialized format. Since we are not using code generation, we create a GenericDatumWriter. It requires the schema both to determine how to write the GenericRecords and to verify that all non-nullable fields are present. + +As in the code generation example, we also create a DataFileWriter, which writes the serialized records, as well as the schema, to the file specified in the dataFileWriter.create call. We write our users to the file via calls to the dataFileWriter.append method. When we are done writing, we close the data file. + +### Deserializing +Finally, we'll deserialize the data file we just created. + +```java +// Deserialize users from disk +DatumReader datumReader = new GenericDatumReader(schema); +DataFileReader dataFileReader = new DataFileReader(file, datumReader); +GenericRecord user = null; +while (dataFileReader.hasNext()) { +// Reuse user object by passing it to next(). This saves us from +// allocating and garbage collecting many objects for files with +// many items. +user = dataFileReader.next(user); +System.out.println(user); +``` + +This outputs: + +```json +{"name": "Alyssa", "favorite_number": 256, "favorite_color": null} +{"name": "Ben", "favorite_number": 7, "favorite_color": "red"} +``` + +Deserializing is very similar to serializing. We create a GenericDatumReader, analogous to the GenericDatumWriter we used in serialization, which converts in-memory serialized items into GenericRecords. We pass the DatumReader and the previously created File to a DataFileReader, analogous to the DataFileWriter, which reads both the schema used by the writer as well as the data from the file on disk. The data will be read using the writer's schema included in the file, and the reader's schema provided to the GenericDatumReader. The writer's schema is needed to know the order in which fields were written, while the reader's schema is needed to know what fields are expected and how to fill in default values for fields added since the file was written. If there are differences between the two schemas, they are resolved according to the Schema Resolution specification. + +Next, we use the DataFileReader to iterate through the serialized users and print the deserialized object to stdout. Note how we perform the iteration: we create a single GenericRecord object which we store the current deserialized user in, and pass this record object to every call of dataFileReader.next. This is a performance optimization that allows the DataFileReader to reuse the same record object rather than allocating a new GenericRecord for every iteration, which can be very expensive in terms of object allocation and garbage collection if we deserialize a large data file. While this technique is the standard way to iterate through a data file, it's also possible to use for (GenericRecord user : dataFileReader) if performance is not a concern. + +### Compiling and running the example code +This example code is included as a Maven project in the examples/java-example directory in the Avro docs. From this directory, execute the following commands to build and run the example: + +```shell +$ mvn compile +$ mvn -q exec:java -Dexec.mainClass=example.GenericMain +``` diff --git a/doc/content/en/docs/1.11.3/Getting started (Python)/_index.md b/doc/content/en/docs/1.11.3/Getting started (Python)/_index.md new file mode 100644 index 00000000000..8675b399fcd --- /dev/null +++ b/doc/content/en/docs/1.11.3/Getting started (Python)/_index.md @@ -0,0 +1,147 @@ +--- +categories: [] +tags: ["python"] +title: "Getting Started (Python)" +linkTitle: "Getting Started (Python)" +weight: 3 +--- + + + +This is a short guide for getting started with Apache Avroâ„ĸ using Python. This guide only covers using Avro for data serialization; see Patrick Hunt's Avro RPC Quick Start for a good introduction to using Avro for RPC. + +## Notice for Python 3 users +A package called "avro-python3" had been provided to support Python 3 previously, but the codebase was consolidated into the "avro" package and that supports both Python 2 and 3 now. The avro-python3 package will be removed in the near future, so users should use the "avro" package instead. They are mostly API compatible, but there's a few minor difference (e.g., function name capitalization, such as avro.schema.Parse vs avro.schema.parse). + +## Download +For Python, the easiest way to get started is to install it from PyPI. Python's Avro API is available over PyPi. + +```shell +$ python3 -m pip install avro +``` + +The official releases of the Avro implementations for C, C++, C#, Java, PHP, Python, and Ruby can be downloaded from the Apache Avroâ„ĸ Releases page. This guide uses Avro 1.11.3, the latest version at the time of writing. Download and unzip avro-1.11.3.tar.gz, and install via python setup.py (this will probably require root privileges). Ensure that you can import avro from a Python prompt. + +```shell +$ tar xvf avro-1.11.3.tar.gz +$ cd avro-1.11.3 +$ python setup.py install +$ python +>>> import avro # should not raise ImportError +``` + +Alternatively, you may build the Avro Python library from source. From your the root Avro directory, run the commands + +```shell +$ cd lang/py/ +$ python3 -m pip install -e . +$ python +``` + +## Defining a schema +Avro schemas are defined using JSON. Schemas are composed of primitive types (null, boolean, int, long, float, double, bytes, and string) and complex types (record, enum, array, map, union, and fixed). You can learn more about Avro schemas and types from the specification, but for now let's start with a simple schema example, user.avsc: + +```json +{"namespace": "example.avro", + "type": "record", + "name": "User", + "fields": [ + {"name": "name", "type": "string"}, + {"name": "favorite_number", "type": ["int", "null"]}, + {"name": "favorite_color", "type": ["string", "null"]} + ] +} +``` + +This schema defines a record representing a hypothetical user. (Note that a schema file can only contain a single schema definition.) At minimum, a record definition must include its type ("type": "record"), a name ("name": "User"), and fields, in this case name, favorite_number, and favorite_color. We also define a namespace ("namespace": "example.avro"), which together with the name attribute defines the "full name" of the schema (example.avro.User in this case). + +Fields are defined via an array of objects, each of which defines a name and type (other attributes are optional, see the record specification for more details). The type attribute of a field is another schema object, which can be either a primitive or complex type. For example, the name field of our User schema is the primitive type string, whereas the favorite_number and favorite_color fields are both unions, represented by JSON arrays. unions are a complex type that can be any of the types listed in the array; e.g., favorite_number can either be an int or null, essentially making it an optional field. + +## Serializing and deserializing without code generation +Data in Avro is always stored with its corresponding schema, meaning we can always read a serialized item, regardless of whether we know the schema ahead of time. This allows us to perform serialization and deserialization without code generation. Note that the Avro Python library does not support code generation. + +Try running the following code snippet, which serializes two users to a data file on disk, and then reads back and deserializes the data file: + +```python +import avro.schema +from avro.datafile import DataFileReader, DataFileWriter +from avro.io import DatumReader, DatumWriter + +schema = avro.schema.parse(open("user.avsc", "rb").read()) + +writer = DataFileWriter(open("users.avro", "wb"), DatumWriter(), schema) +writer.append({"name": "Alyssa", "favorite_number": 256}) +writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"}) +writer.close() + +reader = DataFileReader(open("users.avro", "rb"), DatumReader()) +for user in reader: + print(user) +reader.close() +``` + +This outputs: + +```json +{u'favorite_color': None, u'favorite_number': 256, u'name': u'Alyssa'} +{u'favorite_color': u'red', u'favorite_number': 7, u'name': u'Ben'} +``` + +Do make sure that you open your files in binary mode (i.e. using the modes wb or rb respectively). Otherwise you might generate corrupt files due to automatic replacement of newline characters with the platform-specific representations. + +Let's take a closer look at what's going on here. + +```python +schema = avro.schema.parse(open("user.avsc", "rb").read()) +``` + +avro.schema.parse takes a string containing a JSON schema definition as input and outputs a avro.schema.Schema object (specifically a subclass of Schema, in this case RecordSchema). We're passing in the contents of our user.avsc schema file here. + +```python +writer = DataFileWriter(open("users.avro", "wb"), DatumWriter(), schema) +``` + +We create a DataFileWriter, which we'll use to write serialized items to a data file on disk. The DataFileWriter constructor takes three arguments: + +* The file we'll serialize to +* A DatumWriter, which is responsible for actually serializing the items to Avro's binary format (DatumWriters can be used separately from DataFileWriters, e.g., to perform IPC with Avro). +* The schema we're using. The DataFileWriter needs the schema both to write the schema to the data file, and to verify that the items we write are valid items and write the appropriate fields. + +```python +writer.append({"name": "Alyssa", "favorite_number": 256}) +writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"}) +``` + +We use DataFileWriter.append to add items to our data file. Avro records are represented as Python dicts. Since the field favorite_color has type ["string", "null"], we are not required to specify this field, as shown in the first append. Were we to omit the required name field, an exception would be raised. Any extra entries not corresponding to a field are present in the dict are ignored. + +```python +reader = DataFileReader(open("users.avro", "rb"), DatumReader()) +``` + +We open the file again, this time for reading back from disk. We use a DataFileReader and DatumReader analagous to the DataFileWriter and DatumWriter above. + +```python +for user in reader: + print(user) +``` + +The DataFileReader is an iterator that returns dicts corresponding to the serialized items. diff --git a/doc/content/en/docs/1.11.3/IDL Language/_index.md b/doc/content/en/docs/1.11.3/IDL Language/_index.md new file mode 100644 index 00000000000..f50b0a489be --- /dev/null +++ b/doc/content/en/docs/1.11.3/IDL Language/_index.md @@ -0,0 +1,435 @@ +--- +title: "IDL Language" +linkTitle: "IDL Language" +weight: 201 +--- + + + +## Introduction +This document defines Avro IDL, a higher-level language for authoring Avro schemata. Before reading this document, you should have familiarity with the concepts of schemata and protocols, as well as the various primitive and complex types available in Avro. + +## Overview + +### Purpose +The aim of the Avro IDL language is to enable developers to author schemata in a way that feels more similar to common programming languages like Java, C++, or Python. Additionally, the Avro IDL language may feel more familiar for those users who have previously used the interface description languages (IDLs) in other frameworks like Thrift, Protocol Buffers, or CORBA. + +### Usage +Each Avro IDL file defines a single Avro Protocol, and thus generates as its output a JSON-format Avro Protocol file with extension .avpr. + +To convert a _.avdl_ file into a _.avpr_ file, it may be processed by the `idl` tool. For example: +```shell +$ java -jar avro-tools.jar idl src/test/idl/input/namespaces.avdl /tmp/namespaces.avpr +$ head /tmp/namespaces.avpr +{ + "protocol" : "TestNamespace", + "namespace" : "avro.test.protocol", +``` +The `idl` tool can also process input to and from _stdin_ and _stdout_. See `idl --help` for full usage information. + +A Maven plugin is also provided to compile .avdl files. To use it, add something like the following to your pom.xml: +```xml + + + + org.apache.avro + avro-maven-plugin + + + + idl-protocol + + + + + + +``` + +## Defining a Protocol in Avro IDL +An Avro IDL file consists of exactly one protocol definition. The minimal protocol is defined by the following code: +```java +protocol MyProtocol { +} +``` +This is equivalent to (and generates) the following JSON protocol definition: +```json +{ +"protocol" : "MyProtocol", + "types" : [ ], + "messages" : { + } +} +``` +The namespace of the protocol may be changed using the @namespace annotation: +```java +@namespace("mynamespace") +protocol MyProtocol { +} +``` +This notation is used throughout Avro IDL as a way of specifying properties for the annotated element, as will be described later in this document. + +Protocols in Avro IDL can contain the following items: + +* Imports of external protocol and schema files. +* Definitions of named schemata, including records, errors, enums, and fixeds. +* Definitions of RPC messages + +## Imports +Files may be imported in one of three formats: + +* An IDL file may be imported with a statement like: + + `import idl "foo.avdl";` + +* A JSON protocol file may be imported with a statement like: + + `import protocol "foo.avpr";` + +* A JSON schema file may be imported with a statement like: + + `import schema "foo.avsc";` + +Messages and types in the imported file are added to this file's protocol. + +Imported file names are resolved relative to the current IDL file. + +## Defining an Enumeration +Enums are defined in Avro IDL using a syntax similar to C or Java. An Avro Enum supports optional default values. In the case that a reader schema is unable to recognize a symbol written by the writer, the reader will fall back to using the defined default value. This default is only used when an incompatible symbol is read. It is not used if the enum field is missing. + +Example Writer Enum Definition +```java +enum Shapes { + SQUARE, TRIANGLE, CIRCLE, OVAL +} +``` +Example Reader Enum Definition +```java +enum Shapes { + SQUARE, TRIANGLE, CIRCLE +} = CIRCLE; +``` +In the above example, the reader will use the default value of `CIRCLE` whenever reading data written with the `OVAL` symbol of the writer. Also note that, unlike the JSON format, anonymous enums cannot be defined. + +## Defining a Fixed Length Field +Fixed fields are defined using the following syntax: +``` +fixed MD5(16); +``` +This example defines a fixed-length type called MD5 which contains 16 bytes. + +## Defining Records and Errors +Records are defined in Avro IDL using a syntax similar to a struct definition in C: +```java +record Employee { + string name; + boolean active = true; + long salary; +} +``` +The above example defines a record with the name “Employee” with three fields. + +To define an error, simply use the keyword _error_ instead of _record_. For example: +```java +error Kaboom { + string explanation; + int result_code = -1; +} +``` +Each field in a record or error consists of a type and a name, optional property annotations and an optional default value. + +A type reference in Avro IDL must be one of: + +* A primitive type +* A logical type +* A named schema defined prior to this usage in the same Protocol +* A complex type (array, map, or union) + +### Primitive Types +The primitive types supported by Avro IDL are the same as those supported by Avro's JSON format. This list includes _int_, _long_, _string_, _boolean_, _float_, _double_, _null_, and _bytes_. + +### Logical Types +Some of the logical types supported by Avro's JSON format are also supported by Avro IDL. The currently supported types are: + +* _decimal_ (logical type [decimal]({{< relref "../specification#decimal" >}})) +* _date_ (logical type [date]({{< relref "../specification#date" >}})) +* _time_ms_ (logical type [time-millis]({{< relref "../specification#time-millisecond-precision" >}})) +* _timestamp_ms_ (logical type [timestamp-millis]({{< relref "../specification#timestamp-millisecond-precision" >}})) +* _uuid_ (logical type [uuid]({{< relref "../specification#uuid" >}})) + +For example: +```java +record Job { + string jobid; + date submitDate; + time_ms submitTime; + timestamp_ms finishTime; + decimal(9,2) finishRatio; + uuid pk = "a1a2a3a4-b1b2-c1c2-d1d2-d3d4d5d6d7d8"; +} +``` + +Logical types can also be specified via an annotation, which is useful for logical types for which a keyword does not exist: + +```java +record Job { + string jobid; + @logicalType("timestamp-micros") + long finishTime; +} +``` + +### References to Named Schemata +If a named schema has already been defined in the same Avro IDL file, it may be referenced by name as if it were a primitive type: +```java +record Card { + Suit suit; // refers to the enum Card defined above + int number; +} +``` + +### Default Values +Default values for fields may be optionally specified by using an equals sign after the field name followed by a JSON expression indicating the default value. This JSON is interpreted as described in the [spec]({{< relref "../specification#schema-record" >}}). + +### Complex Types + +#### Arrays +Array types are written in a manner that will seem familiar to C++ or Java programmers. An array of any type t is denoted `array`. For example, an array of strings is denoted `array`, and a multidimensional array of Foo records would be `array>`. + +#### Maps +Map types are written similarly to array types. An array that contains values of type t is written `map`. As in the JSON schema format, all maps contain `string`-type keys. + +#### Unions +Union types are denoted as `union { typeA, typeB, typeC, ... }`. For example, this record contains a string field that is optional (unioned with null), and a field containing either a precise or a imprecise number: +```java +record RecordWithUnion { + union { null, string } optionalString; + union { decimal(12, 6), float } number; +} +``` +Note that the same restrictions apply to Avro IDL unions as apply to unions defined in the JSON format; namely, a record may not contain multiple elements of the same type. Also, fields/parameters that use the union type and have a default parameter must specify a default value of the same type as the **first** union type. + +Because it occurs so often, there is a special shorthand to denote a union of `null` with another type. In the following snippet, the first three fields have identical types: + +```java +record RecordWithUnion { + union { null, string } optionalString1 = null; + string? optionalString2 = null; + string? optionalString3; // No default value + string? optionalString4 = "something"; +} +``` + +Note that unlike explicit unions, the position of the `null` type is fluid; it will be the first or last type depending on the default value (if any). So in the example above, all fields are valid. + +## Defining RPC Messages +The syntax to define an RPC message within a Avro IDL protocol is similar to the syntax for a method declaration within a C header file or a Java interface. To define an RPC message add which takes two arguments named _foo_ and _bar_, returning an _int_, simply include the following definition within the protocol: +```java +int add(int foo, int bar = 0); +``` +Message arguments, like record fields, may specify default values. + +To define a message with no response, you may use the alias _void_, equivalent to the Avro _null_ type: +```java +void logMessage(string message); +``` +If you have previously defined an error type within the same protocol, you may declare that a message can throw this error using the syntax: +```java +void goKaboom() throws Kaboom; +``` +To define a one-way message, use the keyword `oneway` after the parameter list, for example: +```java +void fireAndForget(string message) oneway; +``` + +## Other Language Features + +### Comments +All Java-style comments are supported within a Avro IDL file. Any text following _//_ on a line is ignored, as is any text between _/*_ and _*/_, possibly spanning multiple lines. + +Comments that begin with _/**_ are used as the documentation string for the type or field definition that follows the comment. + +### Escaping Identifiers +Occasionally, one will need to use a reserved language keyword as an identifier. In order to do so, backticks (`) may be used to escape the identifier. For example, to define a message with the literal name error, you may write: +```java +void `error`(); +``` +This syntax is allowed anywhere an identifier is expected. + +### Annotations for Ordering and Namespaces +Java-style annotations may be used to add additional properties to types and fields throughout Avro IDL. + +For example, to specify the sort order of a field within a record, one may use the `@order` annotation before the field name as follows: +```java +record MyRecord { + string @order("ascending") myAscendingSortField; + string @order("descending") myDescendingField; + string @order("ignore") myIgnoredField; +} +``` +A field's type (with the exception of type references) may also be preceded by annotations, e.g.: +```java +record MyRecord { + @java-class("java.util.ArrayList") array myStrings; +} +``` +This can be used to support java classes that can be serialized/deserialized via their `toString`/`String constructor`, e.g.: +```java +record MyRecord { + @java-class("java.math.BigDecimal") string value; + @java-key-class("java.io.File") map fileStates; + array<@java-class("java.math.BigDecimal") string> weights; +} +``` +Similarly, a `@namespace` annotation may be used to modify the namespace when defining a named schema. For example: +```java +@namespace("org.apache.avro.firstNamespace") +protocol MyProto { + @namespace("org.apache.avro.someOtherNamespace") + record Foo {} + + record Bar {} +} +``` +will define a protocol in the _firstNamespace_ namespace. The record _Foo_ will be defined in _someOtherNamespace_ and _Bar_ will be defined in _firstNamespace_ as it inherits its default from its container. + +Type and field aliases are specified with the `@aliases` annotation as follows: +```java +@aliases(["org.old.OldRecord", "org.ancient.AncientRecord"]) +record MyRecord { + string @aliases(["oldField", "ancientField"]) myNewField; +} +``` +Some annotations like those listed above are handled specially. All other annotations are added as properties to the protocol, message, schema or field. + +## Complete Example +The following is an example of an Avro IDL file that shows most of the above features: +```java +/* +* Header with license information. +*/ + +/** + * An example protocol in Avro IDL + */ +@namespace("org.apache.avro.test") +protocol Simple { + /** Documentation for the enum type Kind */ + @aliases(["org.foo.KindOf"]) + enum Kind { + FOO, + BAR, // the bar enum value + BAZ + } = FOO; // For schema evolution purposes, unmatched values do not throw an error, but are resolved to FOO. + + /** MD5 hash; good enough to avoid most collisions, and smaller than (for example) SHA256. */ + fixed MD5(16); + + record TestRecord { + /** Record name; has no intrinsic order */ + string @order("ignore") name; + + Kind @order("descending") kind; + + MD5 hash; + + /* + Note that 'null' is the first union type. Just like .avsc / .avpr files, the default value must be of the first union type. + */ + union { null, MD5 } /** Optional field */ @aliases(["hash"]) nullableHash = null; + + array arrayOfLongs; + } + + /** Errors are records that can be thrown from a method */ + error TestError { + string message; + } + + string hello(string greeting); + /** Return what was given. Demonstrates the use of backticks to name types/fields/messages/parameters after keywords */ + TestRecord echo(TestRecord `record`); + int add(int arg1, int arg2); + bytes echoBytes(bytes data); + void `error`() throws TestError; + // The oneway keyword forces the method to return null. + void ping() oneway; +} +``` +Additional examples may be found in the Avro source tree under the `src/test/idl/input` directory. + +## IDE support + +There are several editors and IDEs that support Avro IDL files, usually via plugins. + +### JetBrains + +Apache Avro IDL Schema Support 203.1.2 was released in 9 December 2021. + +Features: +* Syntax Highlighting +* Code Completion +* Code Formatting +* Error Highlighting +* Inspections & quick fixes +* JSON schemas for .avpr and .avsc files + +It's available via the [JetBrains Marketplace](https://plugins.jetbrains.com/plugin/15728-apache-avro-idl-schema-support) +and on [GitHub](https://github.com/opwvhk/avro-schema-support). + +The plugin supports almost the all JetBrains products: IntelliJ IDEA, PyCharm, WebStorm, Android Studio, AppCode, GoLand, Rider, CLion, RubyMine, PhpStorm, DataGrip, DataSpell, MPS, Code With Me Guest and JetBrains Client. + +Only JetBrains Gateway does not support this plugin directly. But the backend (JetBrains) IDE that it connects to does. + +### Eclipse + +Avroclipse 0.0.11 was released on 4 December 2019. + +Features: +* Syntax Highlighting +* Error Highlighting +* Code Completion + +It is available on the [Eclipse Marketplace](https://marketplace.eclipse.org/content/avroclipse) +and [GitHub](https://github.com/dvdkruk/avroclipse). + +### Visual Studio Code + +avro-idl 0.5.0 was released on 16 June 2021. It provides syntax highlighting. + +It is available on the [VisualStudio Marketplace](https://marketplace.visualstudio.com/items?itemName=streetsidesoftware.avro) +and [GitHub](https://github.com/Jason3S/vscode-avro-ext) + +### Atom.io + +atom-language-avro 0.0.13 was released on 14 August 2015. It provides syntax highlighting. + +It is available as [Atom.io package](https://atom.io/packages/atom-language-avro) +and [GitHub](https://github.com/jonesetc/atom-language-avro) + +### Vim + +A `.avdl` detecting plugin by Gurpreet Atwal on [GitHub](https://github.com/gurpreetatwal/vim-avro) (Last change in December 2016) + +[avro-idl.vim](https://github.com/apache/avro/blob/master/share/editors/avro-idl.vim) in the Avro repository `share/editors` directory (last change in September 2010) + +Both provide syntax highlighting. diff --git a/doc/content/en/docs/1.11.3/MapReduce guide/_index.md b/doc/content/en/docs/1.11.3/MapReduce guide/_index.md new file mode 100644 index 00000000000..0d2df4db1e8 --- /dev/null +++ b/doc/content/en/docs/1.11.3/MapReduce guide/_index.md @@ -0,0 +1,396 @@ +--- +title: "MapReduce guide" +linkTitle: "MapReduce guide" +weight: 200 +--- + + + +Avro provides a convenient way to represent complex data structures within a Hadoop MapReduce job. Avro data can be used as both input to and output from a MapReduce job, as well as the intermediate format. The example in this guide uses Avro data for all three, but it's possible to mix and match; for instance, MapReduce can be used to aggregate a particular field in an Avro record. + +This guide assumes basic familiarity with both Hadoop MapReduce and Avro. See the [Hadoop documentation](https://hadoop.apache.org/docs/current/) and the [Avro getting started guide](./getting-started-java/) for introductions to these projects. This guide uses the old MapReduce API (`org.apache.hadoop.mapred`) and the new MapReduce API (`org.apache.hadoop.mapreduce`). + +## Setup +The code from this guide is included in the Avro docs under examples/mr-example. The example is set up as a Maven project that includes the necessary Avro and MapReduce dependencies and the Avro Maven plugin for code generation, so no external jars are needed to run the example. In particular, the POM includes the following dependencies: +```xml + + org.apache.avro + avro + 1.11.3 + + + org.apache.avro + avro-mapred + 1.11.3 + + + org.apache.hadoop + hadoop-client + 3.1.2 + +``` +And the following plugin: +```xml + + org.apache.avro + avro-maven-plugin + 1.11.3 + + + generate-sources + + schema + + + ${project.basedir}/../ + ${project.basedir}/target/generated-sources/ + + + + +``` + +If you do not configure the *sourceDirectory* and *outputDirectory* properties, the defaults will be used. The *sourceDirectory* property defaults to *src/main/avro*. The *outputDirectory* property defaults to *target/generated-sources*. You can change the paths to match your project layout. + +Alternatively, Avro jars can be downloaded directly from the Apache Avroâ„ĸ Releases [page](https://avro.apache.org/releases.html). The relevant Avro jars for this guide are *avro-1.11.3.jar* and *avro-mapred-1.11.3.jar*, as well as *avro-tools-1.11.3.jar* for code generation and viewing Avro data files as JSON. In addition, you will need to install Hadoop in order to use MapReduce. + +## Example: ColorCount +Below is a simple example of a MapReduce that uses Avro. There is an example for both the old (org.apache.hadoop.mapred) and new (org.apache.hadoop.mapreduce) APIs under *examples/mr-example/src/main/java/example/*. _MapredColorCount_ is the example for the older mapred API while _MapReduceColorCount_ is the example for the newer mapreduce API. Both examples are below, but we will detail the mapred API in our subsequent examples. + +MapredColorCount.java: +```java +package example; + +import java.io.IOException; + +import org.apache.avro.*; +import org.apache.avro.Schema.Type; +import org.apache.avro.mapred.*; +import org.apache.hadoop.conf.*; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapred.*; +import org.apache.hadoop.util.*; + +import example.avro.User; + +public class MapredColorCount extends Configured implements Tool { + + public static class ColorCountMapper extends AvroMapper> { + @Override + public void map(User user, AvroCollector> collector, Reporter reporter) + throws IOException { + CharSequence color = user.getFavoriteColor(); + // We need this check because the User.favorite_color field has type ["string", "null"] + if (color == null) { + color = "none"; + } + collector.collect(new Pair(color, 1)); + } + } + + public static class ColorCountReducer extends AvroReducer> { + @Override + public void reduce(CharSequence key, Iterable values, + AvroCollector> collector, + Reporter reporter) + throws IOException { + int sum = 0; + for (Integer value : values) { + sum += value; + } + collector.collect(new Pair(key, sum)); + } + } + + public int run(String[] args) throws Exception { + if (args.length != 2) { + System.err.println("Usage: MapredColorCount "); + return -1; + } + + JobConf conf = new JobConf(getConf(), MapredColorCount.class); + conf.setJobName("colorcount"); + + FileInputFormat.setInputPaths(conf, new Path(args[0])); + FileOutputFormat.setOutputPath(conf, new Path(args[1])); + + AvroJob.setMapperClass(conf, ColorCountMapper.class); + AvroJob.setReducerClass(conf, ColorCountReducer.class); + + // Note that AvroJob.setInputSchema and AvroJob.setOutputSchema set + // relevant config options such as input/output format, map output + // classes, and output key class. + AvroJob.setInputSchema(conf, User.getClassSchema()); + AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING), + Schema.create(Type.INT))); + + JobClient.runJob(conf); + return 0; + } + + public static void main(String[] args) throws Exception { + int res = ToolRunner.run(new Configuration(), new MapredColorCount(), args); + System.exit(res); + } +} +``` + +MapReduceColorCount.java: +```java +package example; + +import java.io.IOException; + +import org.apache.avro.Schema; +import org.apache.avro.mapred.AvroKey; +import org.apache.avro.mapred.AvroValue; +import org.apache.avro.mapreduce.AvroJob; +import org.apache.avro.mapreduce.AvroKeyInputFormat; +import org.apache.avro.mapreduce.AvroKeyValueOutputFormat; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +import example.avro.User; + +public class MapReduceColorCount extends Configured implements Tool { + + public static class ColorCountMapper extends + Mapper, NullWritable, Text, IntWritable> { + + @Override + public void map(AvroKey key, NullWritable value, Context context) + throws IOException, InterruptedException { + + CharSequence color = key.datum().getFavoriteColor(); + if (color == null) { + color = "none"; + } + context.write(new Text(color.toString()), new IntWritable(1)); + } + } + + public static class ColorCountReducer extends + Reducer, AvroValue> { + + @Override + public void reduce(Text key, Iterable values, + Context context) throws IOException, InterruptedException { + + int sum = 0; + for (IntWritable value : values) { + sum += value.get(); + } + context.write(new AvroKey(key.toString()), new AvroValue(sum)); + } + } + + public int run(String[] args) throws Exception { + if (args.length != 2) { + System.err.println("Usage: MapReduceColorCount "); + return -1; + } + + Job job = new Job(getConf()); + job.setJarByClass(MapReduceColorCount.class); + job.setJobName("Color Count"); + + FileInputFormat.setInputPaths(job, new Path(args[0])); + FileOutputFormat.setOutputPath(job, new Path(args[1])); + + job.setInputFormatClass(AvroKeyInputFormat.class); + job.setMapperClass(ColorCountMapper.class); + AvroJob.setInputKeySchema(job, User.getClassSchema()); + job.setMapOutputKeyClass(Text.class); + job.setMapOutputValueClass(IntWritable.class); + + job.setOutputFormatClass(AvroKeyValueOutputFormat.class); + job.setReducerClass(ColorCountReducer.class); + AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); + AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); + + return (job.waitForCompletion(true) ? 0 : 1); + } + + public static void main(String[] args) throws Exception { + int res = ToolRunner.run(new MapReduceColorCount(), args); + System.exit(res); + } +} +``` +ColorCount reads in data files containing *User* records, defined in _examples/user.avsc_, and counts the number of instances of each favorite color. (This example draws inspiration from the canonical _WordCount_ MapReduce application.) This example uses the old MapReduce API. See MapReduceAvroWordCount, found under _doc/examples/mr-example/src/main/java/example/_ to see the new MapReduce API example. The User schema is defined as follows: +```json +{"namespace": "example.avro", + "type": "record", + "name": "User", + "fields": [ + {"name": "name", "type": "string"}, + {"name": "favorite_number", "type": ["int", "null"]}, + {"name": "favorite_color", "type": ["string", "null"]} + ] +} +``` +This schema is compiled into the *User* class used by *ColorCount* via the Avro Maven plugin (see _examples/mr-example/pom.xml_ for how this is set up). + +*ColorCountMapper* essentially takes a *User* as input and extracts the User's favorite color, emitting the key-value pair ``. _ColorCountReducer_ then adds up how many occurrences of a particular favorite color were emitted, and outputs the result as a Pair record. These Pairs are serialized to an Avro data file. + +## Running ColorCount +The _ColorCount_ application is provided as a Maven project in the Avro docs under _examples/mr-example_. To build the project, including the code generation of the User schema, run: +```shell +mvn compile +``` +Next, run _GenerateData_ from `examples/mr-examples` to create an Avro data file, `input/users.avro`, containing 20 Users with favorite colors chosen randomly from a list: +```shell +mvn exec:java -q -Dexec.mainClass=example.GenerateData +``` +Besides creating the data file, GenerateData prints the JSON representations of the Users generated to stdout, for example: +```json +{"name": "user", "favorite_number": null, "favorite_color": "red"} +{"name": "user", "favorite_number": null, "favorite_color": "green"} +{"name": "user", "favorite_number": null, "favorite_color": "purple"} +{"name": "user", "favorite_number": null, "favorite_color": null} +... +``` +Now we're ready to run ColorCount. We specify our freshly-generated input folder as the input path and output as our output folder (note that MapReduce will not start a job if the output folder already exists): +```shell +mvn exec:java -q -Dexec.mainClass=example.MapredColorCount -Dexec.args="input output" +``` +Once ColorCount completes, checking the contents of the new output directory should yield the following: +```shell +$ ls output/ +part-00000.avro _SUCCESS +``` +You can check the contents of the generated Avro file using the avro-tools jar: +```shell +$ java -jar /path/to/avro-tools-1.11.3.jar tojson output/part-00000.avro +{"value": 3, "key": "blue"} +{"value": 7, "key": "green"} +{"value": 1, "key": "none"} +{"value": 2, "key": "orange"} +{"value": 3, "key": "purple"} +{"value": 2, "key": "red"} +{"value": 2, "key": "yellow"} +``` +Now let's go over the ColorCount example in detail. + +## AvroMapper - org.apache.hadoop.mapred API + +The easiest way to use Avro data files as input to a MapReduce job is to subclass `AvroMapper`. An `AvroMapper` defines a `map` function that takes an Avro datum as input and outputs a key/value pair represented as a Pair record. In the ColorCount example, ColorCountMapper is an AvroMapper that takes a User as input and outputs a `Pair>`, where the CharSequence key is the user's favorite color and the Integer value is 1. +```java +public static class ColorCountMapper extends AvroMapper> { + @Override + public void map(User user, AvroCollector> collector, Reporter reporter) + throws IOException { + CharSequence color = user.getFavoriteColor(); + // We need this check because the User.favorite_color field has type ["string", "null"] + if (color == null) { + color = "none"; + } + collector.collect(new Pair(color, 1)); + } +} +``` +In order to use our AvroMapper, we must call AvroJob.setMapperClass and AvroJob.setInputSchema. +```java +AvroJob.setMapperClass(conf, ColorCountMapper.class); +AvroJob.setInputSchema(conf, User.getClassSchema()); +``` +Note that `AvroMapper` does not implement the `Mapper` interface. Under the hood, the specified Avro data files are deserialized into AvroWrappers containing the actual data, which are processed by a Mapper that calls the configured AvroMapper's map function. AvroJob.setInputSchema sets up the relevant configuration parameters needed to make this happen, thus you should not need to call `JobConf.setMapperClass`, `JobConf.setInputFormat`, `JobConf.setMapOutputKeyClass`, `JobConf.setMapOutputValueClass`, or `JobConf.setOutputKeyComparatorClass`. + +## Mapper - org.apache.hadoop.mapreduce API +This document will not go into all the differences between the mapred and mapreduce APIs, however will describe the main differences. As you can see, ColorCountMapper is now a subclass of the Hadoop Mapper class and is passed an AvroKey as it's key. Additionally, the AvroJob method calls were slightly changed. +```java + public static class ColorCountMapper extends + Mapper, NullWritable, Text, IntWritable> { + + @Override + public void map(AvroKey key, NullWritable value, Context context) + throws IOException, InterruptedException { + + CharSequence color = key.datum().getFavoriteColor(); + if (color == null) { + color = "none"; + } + context.write(new Text(color.toString()), new IntWritable(1)); + } + } +``` + +## AvroReducer - org.apache.hadoop.mapred API +Analogously to AvroMapper, an AvroReducer defines a reducer function that takes the key/value types output by an AvroMapper (or any mapper that outputs Pairs) and outputs a key/value pair represented a Pair record. In the ColorCount example, ColorCountReducer is an AvroReducer that takes the CharSequence key representing a favorite color and the `Iterable` representing the counts for that color (they should all be 1 in this example) and adds up the counts. +```java +public static class ColorCountReducer extends AvroReducer> { + @Override + public void reduce(CharSequence key, Iterable values, + AvroCollector> collector, + Reporter reporter) + throws IOException { + int sum = 0; + for (Integer value : values) { + sum += value; + } + collector.collect(new Pair(key, sum)); + } +} +``` +In order to use our AvroReducer, we must call AvroJob.setReducerClass and AvroJob.setOutputSchema. +```java +AvroJob.setReducerClass(conf, ColorCountReducer.class); +AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING), + Schema.create(Type.INT))); +``` +Note that _AvroReducer_ does not implement the _Reducer_ interface. The intermediate Pairs output by the mapper are split into _AvroKeys_ and _AvroValues_, which are processed by a Reducer that calls the configured AvroReducer's `reduce` function. `AvroJob.setOutputSchema` sets up the relevant configuration parameters needed to make this happen, thus you should not need to call `JobConf.setReducerClass`, `JobConf.setOutputFormat`, `JobConf.setOutputKeyClass`, `JobConf.setMapOutputKeyClass`, `JobConf.setMapOutputValueClass`, or `JobConf.setOutputKeyComparatorClass`. + +## Reduce - org.apache.hadoop.mapreduce API +As before we not detail every difference between the APIs. As with the _Mapper_ change _ColorCountReducer_ is now a subclass of _Reducer_ and _AvroKey_ and _AvroValue_ are emitted. Additionally, the _AvroJob_ method calls were slightly changed. +```java + public static class ColorCountReducer extends + Reducer, AvroValue> { + + @Override + public void reduce(Text key, Iterable values, + Context context) throws IOException, InterruptedException { + + int sum = 0; + for (IntWritable value : values) { + sum += value.get(); + } + context.write(new AvroKey(key.toString()), new AvroValue(sum)); + } + } +``` + +## Learning more +The mapred API allows users to mix Avro AvroMappers and AvroReducers with non-Avro Mappers and Reducers and the mapreduce API allows users input Avro and output non-Avro or vice versa. + +The mapred package has API org.apache.avro.mapred documentation as does the `org.apache.avro.mapreduce` package. MapReduce API (`org.apache.hadoop.mapreduce`). Similarily to the mapreduce package, it's possible with the mapred API to implement your own Mappers and Reducers directly using the public classes provided in these libraries. See the `AvroWordCount` application, found under _examples/mr-example/src/main/java/example/AvroWordCount.java_ in the Avro documentation, for an example of implementing a Reducer that outputs Avro data using the old MapReduce API. See the `MapReduceAvroWordCount` application, found under _examples/mr-example/src/main/java/example/MapReduceAvroWordCount.java_ in the Avro documentation, for an example of implementing a Reducer that outputs Avro data using the new MapReduce API. diff --git a/doc/content/en/docs/1.11.3/SASL profile/_index.md b/doc/content/en/docs/1.11.3/SASL profile/_index.md new file mode 100644 index 00000000000..67c316e221c --- /dev/null +++ b/doc/content/en/docs/1.11.3/SASL profile/_index.md @@ -0,0 +1,93 @@ +--- +title: "SASL profile" +linkTitle: "SASL profile" +weight: 202 +--- + + + +## Introduction +SASL ([RFC 2222](https://www.ietf.org/rfc/rfc2222.txt)) provides a framework for authentication and security of network protocols. Each protocol that uses SASL is meant to define a SASL profile. This document provides a SASL profile for connection-based Avro RPC. + +## Overview +SASL negotiation proceeds as a series of message interactions over a connection between a client and server using a selected SASL mechanism. The client starts this negotiation by sending its chosen mechanism name with an initial (possibly empty) message. Negotiation proceeds with the exchange of messages until either side indicates success or failure. The content of the messages is mechanism-specific. If the negotiation succeeds, then the session can proceed over the connection, otherwise it must be abandoned. + +Some mechanisms continue to process session data after negotiation (e.g., encrypting it), while some specify that further session data is transmitted unmodifed. + +## Negotiation + +### Commands +Avro SASL negotiation uses four one-byte commands. + +* 0: START Used in a client's initial message. +* 1: CONTINUE Used while negotiation is ongoing. +* 2: FAIL Terminates negotiation unsuccessfully. +* 3: COMPLETE Terminates negotiation sucessfully. + +The format of a START message is: + +`| 0 | 4-byte mechanism name length | mechanism name | 4-byte payload length | payload data |` + +The format of a CONTINUE message is: + +`| 1 | 4-byte payload length | payload data |` + +The format of a FAIL message is: + +`| 2 | 4-byte message length | UTF-8 message |` + +The format of a COMPLETE message is: + +`| 3 | 4-byte payload length | payload data |` + +### Process +Negotiation is initiated by a client sending a START command containing the client's chosen mechanism name and any mechanism-specific payload data. + +The server and client then interchange some number (possibly zero) of CONTINUE messages. Each message contains payload data that is processed by the security mechanism to generate the next message. + +Once either the client or server send a FAIL message then negotiation has failed. UTF-8-encoded text is included in the failure message. Once either a FAIL message has been sent or received, or any other error occurs in the negotiation, further communication on this connection must cease. + +Once either the client or server send a COMPLETE message then negotiation has completed successfully. Session data may now be transmitted over the connection until it is closed by either side. + +## Session Data +If no SASL QOP (quality of protection) is negotiated, then all subsequent writes to/reads over this connection are written/read unmodified. In particular, messages use Avro [framing](#Message+Framing), and are of the form: + +`| 4-byte frame length | frame data | ... | 4 zero bytes |` + +If a SASL QOP is negotiated, then it must be used by the connection for all subsequent messages. This is done by wrapping each non-empty frame written using the security mechanism and unwrapping each non-empty frame read. The length written in each non-empty frame is the length of the wrapped data. Complete frames must be passed to the security mechanism for unwrapping. Unwrapped data is then passed to the application as the content of the frame. + +If at any point processing fails due to wrapping, unwrapping or framing errors, then all further communication on this connection must cease. + +## Anonymous Mechanism +The SASL anonymous mechanism ([RFC 2245](https://www.ietf.org/rfc/rfc2222.txt)) is quite simple to implement. In particular, an initial anonymous request may be prefixed by the following static sequence: + +`| 0 | 0009 | ANONYMOUS | 0000 |` + +If a server uses the anonymous mechanism, it should check that the mechanism name in the start message prefixing the first request received is 'ANONYMOUS', then simply prefix its initial response with a COMPLETE message of: + +`| 3 | 0000 |` + +If an anonymous server recieves some other mechanism name, then it may respond with a FAIL message as simple as: + +`| 2 | 0000 |` + +Note that the anonymous mechanism need add no additional round-trip messages between client and server. The START message can be piggybacked on the initial request and the COMPLETE or FAIL message can be piggybacked on the initial response. diff --git a/doc/content/en/docs/1.11.3/Specification/_index.md b/doc/content/en/docs/1.11.3/Specification/_index.md new file mode 100755 index 00000000000..7cc5a17547e --- /dev/null +++ b/doc/content/en/docs/1.11.3/Specification/_index.md @@ -0,0 +1,848 @@ +--- +title: "Specification" +linkTitle: "Specification" +weight: 4 +date: 2021-10-25 +aliases: +- spec.html +--- + + + +## Introduction +This document defines Apache Avro. It is intended to be the authoritative specification. Implementations of Avro must adhere to this document. + +## Schema Declaration {#schema-declaration} +A Schema is represented in [JSON](https://www.json.org/) by one of: + +* A JSON string, naming a defined type. +* A JSON object, of the form: +```js +{"type": "typeName", ...attributes...} +``` +where _typeName_ is either a primitive or derived type name, as defined below. Attributes not defined in this document are permitted as metadata, but must not affect the format of serialized data. +* A JSON array, representing a union of embedded types. + +## Primitive Types +The set of primitive type names is: + +* _null_: no value +* _boolean_: a binary value +* _int_: 32-bit signed integer +* _long_: 64-bit signed integer +* _float_: single precision (32-bit) IEEE 754 floating-point number +* _double_: double precision (64-bit) IEEE 754 floating-point number +* _bytes_: sequence of 8-bit unsigned bytes +* _string_: unicode character sequence + +Primitive types have no specified attributes. + +Primitive type names are also defined type names. Thus, for example, the schema "string" is equivalent to: +```json +{"type": "string"} +``` + +## Complex Types +Avro supports six kinds of complex types: _records_, _enums_, _arrays_, _maps_, _unions_ and _fixed_. + +### Records {#schema-record} +Records use the type name "record" and support the following attributes: + +* _name_: a JSON string providing the name of the record (required). +* _namespace_, a JSON string that qualifies the name (optional); +* _doc_: a JSON string providing documentation to the user of this schema (optional). +* _aliases_: a JSON array of strings, providing alternate names for this record (optional). +* _fields_: a JSON array, listing fields (required). Each field is a JSON object with the following attributes: + * _name_: a JSON string providing the name of the field (required), and + * _doc_: a JSON string describing this field for users (optional). + * _type_: a [schema]({{< ref "#schema-declaration" >}} "Schema declaration"), as defined above + * _order_: specifies how this field impacts sort ordering of this record (optional). Valid values are "ascending" (the default), "descending", or "ignore". For more details on how this is used, see the sort order section below. + * _aliases_: a JSON array of strings, providing alternate names for this field (optional). + * _default_: A default value for this field, only used when reading instances that lack the field for schema evolution purposes. The presence of a default value does not make the field optional at encoding time. Permitted values depend on the field's schema type, according to the table below. Default values for union fields correspond to the first schema in the union. Default values for bytes and fixed fields are JSON strings, where Unicode code points 0-255 are mapped to unsigned 8-bit byte values 0-255. Avro encodes a field even if its value is equal to its default. + +*field default values* + +| **avro type** | **json type** | **example** | +|---------------|----------------|-------------| +| null | null | `null` | +| boolean | boolean | `true` | +| int,long | integer | `1` | +| float,double | number | `1.1` | +| bytes | string | `"\u00FF"` | +| string | string | `"foo"` | +| record | object | `{"a": 1}` | +| enum | string | `"FOO"` | +| array | array | `[1]` | +| map | object | `{"a": 1}` | +| fixed | string | `"\u00ff"` | + +For example, a linked-list of 64-bit values may be defined with: +```jsonc +{ + "type": "record", + "name": "LongList", + "aliases": ["LinkedLongs"], // old name for this + "fields" : [ + {"name": "value", "type": "long"}, // each element has a long + {"name": "next", "type": ["null", "LongList"]} // optional next element + ] +} +``` + +### Enums +Enums use the type name "enum" and support the following attributes: + +* _name_: a JSON string providing the name of the enum (required). +* _namespace_, a JSON string that qualifies the name (optional); +* _aliases_: a JSON array of strings, providing alternate names for this enum (optional). +* _doc_: a JSON string providing documentation to the user of this schema (optional). +* _symbols_: a JSON array, listing symbols, as JSON strings (required). All symbols in an enum must be unique; duplicates are prohibited. Every symbol must match the regular expression [A-Za-z_][A-Za-z0-9_]* (the same requirement as for [names]({{< ref "#names" >}} "Names")). +* _default_: A default value for this enumeration, used during resolution when the reader encounters a symbol from the writer that isn't defined in the reader's schema (optional). The value provided here must be a JSON string that's a member of the symbols array. See documentation on schema resolution for how this gets used. + +For example, playing card suits might be defined with: +```json +{ + "type": "enum", + "name": "Suit", + "symbols" : ["SPADES", "HEARTS", "DIAMONDS", "CLUBS"] +} +``` + +### Arrays +Arrays use the type name "array" and support a single attribute: + +* _items_: the schema of the array's items. + +For example, an array of strings is declared with: +```json +{ + "type": "array", + "items" : "string", + "default": [] +} +``` + +### Maps +Maps use the type name "map" and support one attribute: + +* _values_: the schema of the map's values. + +Map keys are assumed to be strings. + +For example, a map from string to long is declared with: +```json +{ + "type": "map", + "values" : "long", + "default": {} +} +``` + +### Unions +Unions, as mentioned above, are represented using JSON arrays. For example, `["null", "string"]` declares a schema which may be either a null or string. + +(Note that when a [default value]({{< ref "#schema-record" >}} "Schema record") is specified for a record field whose type is a union, the type of the default value must match the first element of the union. Thus, for unions containing "null", the "null" is usually listed first, since the default value of such unions is typically null.) + +Unions may not contain more than one schema with the same type, except for the named types record, fixed and enum. For example, unions containing two array types or two map types are not permitted, but two types with different names are permitted. (Names permit efficient resolution when reading and writing unions.) + +Unions may not immediately contain other unions. + +### Fixed +Fixed uses the type name "fixed" and supports the following attributes: + +* _name_: a string naming this fixed (required). +* _namespace_, a string that qualifies the name (optional); +* _aliases_: a JSON array of strings, providing alternate names for this enum (optional). +* _size_: an integer, specifying the number of bytes per value (required). + +For example, 16-byte quantity may be declared with: +```json +{"type": "fixed", "size": 16, "name": "md5"} +``` + +### Names {#names} +Record, enums and fixed are named types. Each has a fullname that is composed of two parts; a name and a namespace, separated by a dot. Equality of names is defined on the fullname. + +Record fields and enum symbols have names as well (but no namespace). Equality of fields and enum symbols is defined on the name of the field/symbol within its scope (the record/enum that defines it). Fields and enum symbols across scopes are never equal. + +The name portion of the fullname of named types, record field names, and enum symbols must: + +* start with [A-Za-z_] +* subsequently contain only [A-Za-z0-9_] + +A namespace is a dot-separated sequence of such names. The empty string may also be used as a namespace to indicate the null namespace. Equality of names (including field names and enum symbols) as well as fullnames is case-sensitive. + +The null namespace may not be used in a dot-separated sequence of names. So the grammar for a namespace is: +``` + | [()*] +``` + +In record, enum and fixed definitions, the fullname is determined according to the algorithm below the example: + +``` +{ + "type": "record", + "name": "Example", + "doc": "A simple name (attribute) and no namespace attribute: use the null namespace (\"\"); the fullname is 'Example'.", + "fields": [ + { + "name": "inheritNull", + "type": { + "type": "enum", + "name": "Simple", + "doc": "A simple name (attribute) and no namespace attribute: inherit the null namespace of the enclosing type 'Example'. The fullname is 'Simple'.", + "symbols": ["a", "b"] + } + }, { + "name": "explicitNamespace", + "type": { + "type": "fixed", + "name": "Simple", + "namespace": "explicit", + "doc": "A simple name (attribute) and a namespace (attribute); the fullname is 'explicit.Simple' (this is a different type than of the 'inheritNull' field).", + "size": 12 + } + }, { + "name": "fullName", + "type": { + "type": "record", + "name": "a.full.Name", + "namespace": "ignored", + "doc": "A name attribute with a fullname, so the namespace attribute is ignored. The fullname is 'a.full.Name', and the namespace is 'a.full'.", + "fields": [ + { + "name": "inheritNamespace", + "type": { + "type": "enum", + "name": "Understanding", + "doc": "A simple name (attribute) and no namespace attribute: inherit the namespace of the enclosing type 'a.full.Name'. The fullname is 'a.full.Understanding'.", + "symbols": ["d", "e"] + } + } + ] + } + } + ] +} +``` + +The fullname of a record, enum or fixed definition is determined by the required `name` and optional `namespace` attributes like this: + +* A fullname is specified. If the name specified contains a dot, then it is assumed to be a fullname, and any namespace also specified is ignored. For example, use "name": "org.foo.X" to indicate the fullname org.foo.X. +* A simple name (a name that contains no dots) and namespace are both specified. For example, one might use "name": "X", "namespace": "org.foo" to indicate the fullname org.foo.X. +* A simple name only is specified (a name that contains no dots). In this case the namespace is taken from the most tightly enclosing named schema or protocol, and the fullname is constructed from that namespace and the name. For example, if "name": "X" is specified, and this occurs within a field of the record definition of org.foo.Y, then the fullname is org.foo.X. This also happens if there is no enclosing namespace (i.e., the enclosing schema definition has the null namespace). + +References to previously defined names are as in the latter two cases above: if they contain a dot they are a fullname, if they do not contain a dot, the namespace is the namespace of the enclosing definition. + +Primitive type names (`null`, `boolean`, `int`, `long`, `float`, `double`, `bytes`, `string`) have no namespace and their names may not be defined in any namespace. + +Complex types (`record`, `enum`, `array`, `map`, `fixed`) have no namespace, but their names (as well as `union`) are permitted to be reused as type names. This can be confusing to the human reader, but is always unambiguous for binary serialization. Due to the limitations of JSON encoding, it is a best practice to use a namespace when using these names. + +A schema or protocol may not contain multiple definitions of a fullname. Further, a name must be defined before it is used ("before" in the depth-first, left-to-right traversal of the JSON parse tree, where the types attribute of a protocol is always deemed to come "before" the messages attribute.) + +### Aliases +Named types and fields may have aliases. An implementation may optionally use aliases to map a writer's schema to the reader's. This facilitates both schema evolution as well as processing disparate datasets. + +Aliases function by re-writing the writer's schema using aliases from the reader's schema. For example, if the writer's schema was named "Foo" and the reader's schema is named "Bar" and has an alias of "Foo", then the implementation would act as though "Foo" were named "Bar" when reading. Similarly, if data was written as a record with a field named "x" and is read as a record with a field named "y" with alias "x", then the implementation would act as though "x" were named "y" when reading. + +A type alias may be specified either as a fully namespace-qualified, or relative to the namespace of the name it is an alias for. For example, if a type named "a.b" has aliases of "c" and "x.y", then the fully qualified names of its aliases are "a.c" and "x.y". + +## Data Serialization and Deserialization +Binary encoded Avro data does not include type information or field names. The benefit is that the serialized data is small, but as a result a schema must always be used in order to read Avro data correctly. The best way to ensure that the schema is structurally identical to the one used to write the data is to use the exact same schema. + +Therefore, files or systems that store Avro data should always include the writer's schema for that data. Avro-based remote procedure call (RPC) systems must also guarantee that remote recipients of data have a copy of the schema used to write that data. In general, it is advisable that any reader of Avro data should use a schema that is the same (as defined more fully in [Parsing Canonical Form for Schemas]({{< ref "#parsing-canonical-form-for-schemas" >}} "Parsing Canonical Form for Schemas")) as the schema that was used to write the data in order to deserialize it correctly. Deserializing data into a newer schema is accomplished by specifying an additional schema, the results of which are described in [Schema Resolution]({{< ref "#schema-resolution" >}}). + +In general, both serialization and deserialization proceed as a depth-first, left-to-right traversal of the schema, serializing or deserializing primitive types as they are encountered. Therefore, it is possible, though not advisable, to read Avro data with a schema that does not have the same Parsing Canonical Form as the schema with which the data was written. In order for this to work, the serialized primitive values must be compatible, in order value by value, with the items in the deserialization schema. For example, int and long are always serialized the same way, so an int could be deserialized as a long. Since the compatibility of two schemas depends on both the data and the serialization format (eg. binary is more permissive than JSON because JSON includes field names, eg. a long that is too large will overflow an int), it is simpler and more reliable to use schemas with identical Parsing Canonical Form. + +### Encodings +Avro specifies two serialization encodings: binary and JSON. Most applications will use the binary encoding, as it is smaller and faster. But, for debugging and web-based applications, the JSON encoding may sometimes be appropriate. + +### Binary Encoding {#binary-encoding} +Binary encoding does not include field names, self-contained information about the types of individual bytes, nor field or record separators. Therefore readers are wholly reliant on the schema used when the data was encoded. + +#### Primitive Types +Primitive types are encoded in binary as follows: + +* _null_ is written as zero bytes. +* a _boolean_ is written as a single byte whose value is either 0 (false) or 1 (true). +* _int_ and _long_ values are written using [variable-length](https://lucene.apache.org/java/3_5_0/fileformats.html#VInt) [zig-zag](https://code.google.com/apis/protocolbuffers/docs/encoding.html#types) coding. Some examples: + +| *value* | *hex* | +|---|---| +| 0 | 00 | +|-1 | 01 | +| 1 | 02 | +|-2 | 03 | +| 2 | 04 | +|...|...| +|-64 | 7f | +|64 | 80 01| +|...|...| + +* a _float_ is written as 4 bytes. The float is converted into a 32-bit integer using a method equivalent to Java's [floatToIntBits](https://docs.oracle.com/javase/8/docs/api/java/lang/Float.html#floatToIntBits-float-) and then encoded in little-endian format. +* a _double_ is written as 8 bytes. The double is converted into a 64-bit integer using a method equivalent to Java's [doubleToLongBits](https://docs.oracle.com/javase/8/docs/api/java/lang/Double.html#doubleToLongBits-double-) and then encoded in little-endian format. +* _bytes_ are encoded as a long followed by that many bytes of data. +* a _string_ is encoded as a long followed by that many bytes of UTF-8 encoded character data. +For example, the three-character string "foo" would be encoded as the long value 3 (encoded as hex 06) followed by the UTF-8 encoding of 'f', 'o', and 'o' (the hex bytes 66 6f 6f): +``` +06 66 6f 6f +``` + +### Complex Types +Complex types are encoded in binary as follows: + +#### Records +A record is encoded by encoding the values of its fields in the order that they are declared. In other words, a record is encoded as just the concatenation of the encodings of its fields. Field values are encoded per their schema. + +For example, the record schema +```json +{ + "type": "record", + "name": "test", + "fields" : [ + {"name": "a", "type": "long"}, + {"name": "b", "type": "string"} + ] +} +``` + +An instance of this record whose a field has value 27 (encoded as hex 36) and whose b field has value "foo" (encoded as hex bytes 06 66 6f 6f), would be encoded simply as the concatenation of these, namely the hex byte sequence: +``` +36 06 66 6f 6f +``` + +#### Enums +An enum is encoded by a int, representing the zero-based position of the symbol in the schema. + +For example, consider the enum: +```json +{"type": "enum", "name": "Foo", "symbols": ["A", "B", "C", "D"] } +``` + +This would be encoded by an int between zero and three, with zero indicating "A", and 3 indicating "D". + +#### Arrays +Arrays are encoded as a series of blocks. Each block consists of a long count value, followed by that many array items. A block with count zero indicates the end of the array. Each item is encoded per the array's item schema. + +If a block's count is negative, its absolute value is used, and the count is followed immediately by a long block size indicating the number of bytes in the block. This block size permits fast skipping through data, e.g., when projecting a record to a subset of its fields. + +For example, the array schema +```json +{"type": "array", "items": "long"} +``` +an array containing the items 3 and 27 could be encoded as the long value 2 (encoded as hex 04) followed by long values 3 and 27 (encoded as hex 06 36) terminated by zero: +``` +04 06 36 00 +``` + +The blocked representation permits one to read and write arrays larger than can be buffered in memory, since one can start writing items without knowing the full length of the array. + +#### Maps {#schema-maps} +Maps are encoded as a series of _blocks_. Each block consists of a `long` _count_ value, followed by that many key/value pairs. A block with count zero indicates the end of the map. Each item is encoded per the map's value schema. + +If a block's count is negative, its absolute value is used, and the count is followed immediately by a `long` block size indicating the number of bytes in the block. This block size permits fast skipping through data, e.g., when projecting a record to a subset of its fields. + +The blocked representation permits one to read and write maps larger than can be buffered in memory, since one can start writing items without knowing the full length of the map. + +#### Unions +A union is encoded by first writing an `int` value indicating the zero-based position within the union of the schema of its value. The value is then encoded per the indicated schema within the union. + +For example, the union schema `["null","string"]` would encode: + +* _null_ as zero (the index of "null" in the union): +`00` +* the string "a" as one (the index of "string" in the union, 1, encoded as hex 02), followed by the serialized string: +`02 02 61` +NOTE: Currently for C/C++ implementations, the positions are practically an int, but theoretically a long. In reality, we don't expect unions with 215M members + +#### Fixed +Fixed instances are encoded using the number of bytes declared in the schema. + +### JSON Encoding +Except for unions, the JSON encoding is the same as is used to encode [field default values]({{< ref "#schema-record" >}}). + +The value of a union is encoded in JSON as follows: + +* if its type is _null_, then it is encoded as a JSON _null_; +* otherwise it is encoded as a JSON object with one name/value pair whose name is the type's name and whose value is the recursively encoded value. For Avro's named types (record, fixed or enum) the user-specified name is used, for other types the type name is used. + +For example, the union schema `["null","string","Foo"]`, where Foo is a record name, would encode: + +* _null_ as _null_; +* the string "a" as `{"string": "a"}` and +* a Foo instance as `{"Foo": {...}}`, where `{...}` indicates the JSON encoding of a Foo instance. + +Note that the original schema is still required to correctly process JSON-encoded data. For example, the JSON encoding does not distinguish between _int_ and _long_, _float_ and _double_, records and maps, enums and strings, etc. + +### Single-object encoding +In some situations a single Avro serialized object is to be stored for a longer period of time. One very common example is storing Avro records for several weeks in an [Apache Kafka](https://kafka.apache.org/) topic. + +In the period after a schema change this persistence system will contain records that have been written with different schemas. So the need arises to know which schema was used to write a record to support schema evolution correctly. In most cases the schema itself is too large to include in the message, so this binary wrapper format supports the use case more effectively. + +#### Single object encoding specification +Single Avro objects are encoded as follows: + +1. A two-byte marker, `C3 01`, to show that the message is Avro and uses this single-record format (version 1). +1. The 8-byte little-endian CRC-64-AVRO [fingerprint]({{< ref "#schema-fingerprints" >}} "Schema fingerprints") of the object's schema. +1. The Avro object encoded using [Avro's binary encoding]({{< ref "#binary-encoding" >}}). + +Implementations use the 2-byte marker to determine whether a payload is Avro. This check helps avoid expensive lookups that resolve the schema from a fingerprint, when the message is not an encoded Avro payload. + +## Sort Order +Avro defines a standard sort order for data. This permits data written by one system to be efficiently sorted by another system. This can be an important optimization, as sort order comparisons are sometimes the most frequent per-object operation. Note also that Avro binary-encoded data can be efficiently ordered without deserializing it to objects. + +Data items may only be compared if they have identical schemas. Pairwise comparisons are implemented recursively with a depth-first, left-to-right traversal of the schema. The first mismatch encountered determines the order of the items. + +Two items with the same schema are compared according to the following rules. + +* _null_ data is always equal. +* _boolean_ data is ordered with false before true. +* _int_, _long_, _float_ and _double_ data is ordered by ascending numeric value. +* _bytes_ and fixed data are compared lexicographically by unsigned 8-bit values. +* _string_ data is compared lexicographically by Unicode code point. Note that since UTF-8 is used as the binary encoding for strings, sorting of bytes and string binary data is identical. +* _array_ data is compared lexicographically by element. +* _enum_ data is ordered by the symbol's position in the enum schema. For example, an enum whose symbols are `["z", "a"]` would sort "z" values before "a" values. +* _union_ data is first ordered by the branch within the union, and, within that, by the type of the branch. For example, an `["int", "string"]` union would order all int values before all string values, with the ints and strings themselves ordered as defined above. +* _record_ data is ordered lexicographically by field. If a field specifies that its order is: + * "ascending", then the order of its values is unaltered. + * "descending", then the order of its values is reversed. + * "ignore", then its values are ignored when sorting. +* _map_ data may not be compared. It is an error to attempt to compare data containing maps unless those maps are in an `"order":"ignore"` record field. + +## Object Container Files +Avro includes a simple object container file format. A file has a schema, and all objects stored in the file must be written according to that schema, using binary encoding. Objects are stored in blocks that may be compressed. Syncronization markers are used between blocks to permit efficient splitting of files for MapReduce processing. + +Files may include arbitrary user-specified metadata. + +A file consists of: + +* A file header, followed by +* one or more file data blocks. + +A file header consists of: + +* Four bytes, ASCII 'O', 'b', 'j', followed by 1. +* file metadata, including the schema. +* The 16-byte, randomly-generated sync marker for this file. + +File metadata is written as if defined by the following [map]({{< ref "#schema-maps" >}}) schema: +```json +{"type": "map", "values": "bytes"} +``` +All metadata properties that start with "avro." are reserved. The following file metadata properties are currently used: + +* **avro.schema** contains the schema of objects stored in the file, as JSON data (required). +* **avro.codec** the name of the compression codec used to compress blocks, as a string. Implementations are required to support the following codecs: "null" and "deflate". If codec is absent, it is assumed to be "null". The codecs are described with more detail below. + +A file header is thus described by the following schema: +```json +{"type": "record", "name": "org.apache.avro.file.Header", + "fields" : [ + {"name": "magic", "type": {"type": "fixed", "name": "Magic", "size": 4}}, + {"name": "meta", "type": {"type": "map", "values": "bytes"}}, + {"name": "sync", "type": {"type": "fixed", "name": "Sync", "size": 16}} + ] +} +``` + +A file data block consists of: + +* A long indicating the count of objects in this block. +* A long indicating the size in bytes of the serialized objects in the current block, after any codec is applied +* The serialized objects. If a codec is specified, this is compressed by that codec. +* The file's 16-byte sync marker. + +A file data block is thus described by the following schema: +```json +{"type": "record", "name": "org.apache.avro.file.DataBlock", + "fields" : [ + {"name": "count", "type": "long"}, + {"name": "data", "type": "bytes"}, + {"name": "sync", "type": {"type": "fixed", "name": "Sync", "size": 16}} + ] +} +``` + +Each block's binary data can be efficiently extracted or skipped without deserializing the contents. The combination of block size, object counts, and sync markers enable detection of corrupt blocks and help ensure data integrity. + +### Required Codecs + +_null_ + +The "null" codec simply passes through data uncompressed. + +_deflate_ + +The "deflate" codec writes the data block using the deflate algorithm as specified in [RFC 1951](https://www.isi.edu/in-notes/rfc1951.txt), and typically implemented using the zlib library. Note that this format (unlike the "zlib format" in RFC 1950) does not have a checksum. + +### Optional Codecs +_bzip2_ + +The "bzip2" codec uses the [bzip2](https://sourceware.org/bzip2/) compression library. + +_snappy_ + +The "snappy" codec uses Google's [Snappy](https://code.google.com/p/snappy/) compression library. Each compressed block is followed by the 4-byte, big-endian CRC32 checksum of the uncompressed data in the block. + +_xz_ + +The "xz" codec uses the [XZ](https://tukaani.org/xz/) compression library. + +_zstandard_ + +The "zstandard" codec uses Facebook's [Zstandard](https://facebook.github.io/zstd/) compression library. + +### Protocol Declaration +Avro protocols describe RPC interfaces. Like schemas, they are defined with JSON text. + +A protocol is a JSON object with the following attributes: + +* _protocol_, a string, the name of the protocol (required); +* _namespace_, an optional string that qualifies the name (optional); +* _doc_, an optional string describing this protocol; +* _types_, an optional list of definitions of named types (records, enums, fixed and errors). An error definition is just like a record definition except it uses "error" instead of "record". Note that forward references to named types are not permitted. +* _messages_, an optional JSON object whose keys are message names and whose values are objects whose attributes are described below. No two messages may have the same name. + +The name and namespace qualification rules defined for schema objects apply to protocols as well. + +### Messages +A message has attributes: + +* a _doc_, an optional description of the message, +* a _request_, a list of named, typed parameter schemas (this has the same form as the fields of a record declaration); +* a _response_ schema; +* an optional union of declared error schemas. The effective union has "string" prepended to the declared union, to permit transmission of undeclared "system" errors. For example, if the declared error union is `["AccessError"]`, then the effective union is `["string", "AccessError"]`. When no errors are declared, the effective error union is `["string"]`. Errors are serialized using the effective union; however, a protocol's JSON declaration contains only the declared union. +* an optional one-way boolean parameter. + +A request parameter list is processed equivalently to an anonymous record. Since record field lists may vary between reader and writer, request parameters may also differ between the caller and responder, and such differences are resolved in the same manner as record field differences. + +The one-way parameter may only be true when the response type is `"null"` and no errors are listed. + +### Sample Protocol +For example, one may define a simple HelloWorld protocol with: +```json +{ + "namespace": "com.acme", + "protocol": "HelloWorld", + "doc": "Protocol Greetings", + + "types": [ + {"name": "Greeting", "type": "record", "fields": [ + {"name": "message", "type": "string"}]}, + {"name": "Curse", "type": "error", "fields": [ + {"name": "message", "type": "string"}]} + ], + + "messages": { + "hello": { + "doc": "Say hello.", + "request": [{"name": "greeting", "type": "Greeting" }], + "response": "Greeting", + "errors": ["Curse"] + } + } +} +``` + +## Protocol Wire Format + +### Message Transport +Messages may be transmitted via different transport mechanisms. + +To the transport, a _message_ is an opaque byte sequence. + +A transport is a system that supports: + +* **transmission of request messages** +* **receipt of corresponding response messages** +Servers may send a response message back to the client corresponding to a request message. The mechanism of correspondence is transport-specific. For example, in HTTP it is implicit, since HTTP directly supports requests and responses. But a transport that multiplexes many client threads over a single socket would need to tag messages with unique identifiers. + +Transports may be either stateless or stateful. In a stateless transport, messaging assumes no established connection state, while stateful transports establish connections that may be used for multiple messages. This distinction is discussed further in the [handshake](#handshake) section below. + +#### HTTP as Transport +When [HTTP](https://www.w3.org/Protocols/rfc2616/rfc2616.html) is used as a transport, each Avro message exchange is an HTTP request/response pair. All messages of an Avro protocol should share a single URL at an HTTP server. Other protocols may also use that URL. Both normal and error Avro response messages should use the 200 (OK) response code. The chunked encoding may be used for requests and responses, but, regardless the Avro request and response are the entire content of an HTTP request and response. The HTTP Content-Type of requests and responses should be specified as "avro/binary". Requests should be made using the POST method. + +HTTP is used by Avro as a stateless transport. + +### Message Framing +Avro messages are _framed_ as a list of buffers. + +Framing is a layer between messages and the transport. It exists to optimize certain operations. + +The format of framed message data is: + +* a series of buffers, where each buffer consists of: + * a four-byte, big-endian _buffer length_, followed by + * that many bytes of _buffer_ data. +* a message is always terminated by a zero-length buffer. + +Framing is transparent to request and response message formats (described below). Any message may be presented as a single or multiple buffers. + +Framing can permit readers to more efficiently get different buffers from different sources and for writers to more efficiently store different buffers to different destinations. In particular, it can reduce the number of times large binary objects are copied. For example, if an RPC parameter consists of a megabyte of file data, that data can be copied directly to a socket from a file descriptor, and, on the other end, it could be written directly to a file descriptor, never entering user space. + +A simple, recommended, framing policy is for writers to create a new segment whenever a single binary object is written that is larger than a normal output buffer. Small objects are then appended in buffers, while larger objects are written as their own buffers. When a reader then tries to read a large object the runtime can hand it an entire buffer directly, without having to copy it. + +### Handshake +The purpose of the handshake is to ensure that the client and the server have each other's protocol definition, so that the client can correctly deserialize responses, and the server can correctly deserialize requests. Both clients and servers should maintain a cache of recently seen protocols, so that, in most cases, a handshake will be completed without extra round-trip network exchanges or the transmission of full protocol text. + +RPC requests and responses may not be processed until a handshake has been completed. With a stateless transport, all requests and responses are prefixed by handshakes. With a stateful transport, handshakes are only attached to requests and responses until a successful handshake response has been returned over a connection. After this, request and response payloads are sent without handshakes for the lifetime of that connection. + +The handshake process uses the following record schemas: +```json +{ + "type": "record", + "name": "HandshakeRequest", "namespace":"org.apache.avro.ipc", + "fields": [ + {"name": "clientHash", + "type": {"type": "fixed", "name": "MD5", "size": 16}}, + {"name": "clientProtocol", "type": ["null", "string"]}, + {"name": "serverHash", "type": "MD5"}, + {"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]} + ] +} +{ + "type": "record", + "name": "HandshakeResponse", "namespace": "org.apache.avro.ipc", + "fields": [ + {"name": "match", + "type": {"type": "enum", "name": "HandshakeMatch", + "symbols": ["BOTH", "CLIENT", "NONE"]}}, + {"name": "serverProtocol", + "type": ["null", "string"]}, + {"name": "serverHash", + "type": ["null", {"type": "fixed", "name": "MD5", "size": 16}]}, + {"name": "meta", + "type": ["null", {"type": "map", "values": "bytes"}]} + ] +} +``` + +* A client first prefixes each request with a `HandshakeRequest` containing just the hash of its protocol and of the server's protocol (`clientHash!=null, clientProtocol=null, serverHash!=null`), where the hashes are 128-bit MD5 hashes of the JSON protocol text. If a client has never connected to a given server, it sends its hash as a guess of the server's hash, otherwise it sends the hash that it previously obtained from this server. +The server responds with a HandshakeResponse containing one of: + * `match=BOTH, serverProtocol=null, serverHash=null` if the client sent the valid hash of the server's protocol and the server knows what protocol corresponds to the client's hash. In this case, the request is complete and the response data immediately follows the HandshakeResponse. + * `match=CLIENT, serverProtocol!=null, serverHash!=null` if the server has previously seen the client's protocol, but the client sent an incorrect hash of the server's protocol. The request is complete and the response data immediately follows the HandshakeResponse. The client must use the returned protocol to process the response and should also cache that protocol and its hash for future interactions with this server. + * `match=NONE` if the server has not previously seen the client's protocol. The serverHash and serverProtocol may also be non-null if the server's protocol hash was incorrect. +In this case the client must then re-submit its request with its protocol text (`clientHash!=null, clientProtocol!=null, serverHash!=null`) and the server should respond with a successful match (match=BOTH, serverProtocol=null, serverHash=null) as above. + +The meta field is reserved for future handshake enhancements. + +### Call Format +A _call_ consists of a request message paired with its resulting response or error message. Requests and responses contain extensible metadata, and both kinds of messages are framed as described above. + +The format of a call request is: + +* _request metadata_, a map with values of type bytes +* the _message name_, an Avro string, followed by +* the _message parameters_. Parameters are serialized according to the message's request declaration. +When the empty string is used as a message name a server should ignore the parameters and return an empty response. A client may use this to ping a server or to perform a handshake without sending a protocol message. + +When a message is declared one-way and a stateful connection has been established by a successful handshake response, no response data is sent. Otherwise the format of the call response is: + +* _response metadata_, a map with values of type bytes +* a one-byte error _flag_ boolean, followed by either: + * if the error flag is false, the message _response_, serialized per the message's response schema. + * if the error flag is true, the _error_, serialized per the message's effective error union schema. + +### Schema Resolution {#schema-resolution} +A reader of Avro data, whether from an RPC or a file, can always parse that data because the original schema must be provided along with the data. However, the reader may be programmed to read data into a different schema. For example, if the data was written with a different version of the software than it is read, then fields may have been added or removed from records. This section specifies how such schema differences should be resolved. + +We refer to the schema used to write the data as the writer's schema, and the schema that the application expects the reader's schema. Differences between these should be resolved as follows: + +* It is an error if the two schemas do not _match_. +To match, one of the following must hold: + * both schemas are arrays whose item types match + * both schemas are maps whose value types match + * both schemas are enums whose (unqualified) names match + * both schemas are fixed whose sizes and (unqualified) names match + * both schemas are records with the same (unqualified) name + * either schema is a union + * both schemas have same primitive type + * the writer's schema may be promoted to the reader's as follows: + * int is promotable to long, float, or double + * long is promotable to float or double + * float is promotable to double + * string is promotable to bytes + * bytes is promotable to string +* **if both are records**: + * the ordering of fields may be different: fields are matched by name. + * schemas for fields with the same name in both records are resolved recursively. + * if the writer's record contains a field with a name not present in the reader's record, the writer's value for that field is ignored. + * if the reader's record schema has a field that contains a default value, and writer's schema does not have a field with the same name, then the reader should use the default value from its field. + * if the reader's record schema has a field with no default value, and writer's schema does not have a field with the same name, an error is signalled. +* **if both are enums**: +if the writer's symbol is not present in the reader's enum and the reader has a default value, then that value is used, otherwise an error is signalled. + +* **if both are arrays**: +This resolution algorithm is applied recursively to the reader's and writer's array item schemas. + +* **if both are maps**: +This resolution algorithm is applied recursively to the reader's and writer's value schemas. + +* **if both are unions**: +The first schema in the reader's union that matches the selected writer's union schema is recursively resolved against it. if none match, an error is signalled. + +* **if reader's is a union, but writer's is not** +The first schema in the reader's union that matches the writer's schema is recursively resolved against it. If none match, an error is signalled. + +* **if writer's is a union, but reader's is not** +If the reader's schema matches the selected writer's schema, it is recursively resolved against it. If they do not match, an error is signalled. + +A schema's _doc_ fields are ignored for the purposes of schema resolution. Hence, the _doc_ portion of a schema may be dropped at serialization. + +### Parsing Canonical Form for Schemas {#parsing-canonical-form-for-schemas} +One of the defining characteristics of Avro is that a reader must use the schema used by the writer of the data in order to know how to read the data. This assumption results in a data format that's compact and also amenable to many forms of schema evolution. However, the specification so far has not defined what it means for the reader to have the "same" schema as the writer. Does the schema need to be textually identical? Well, clearly adding or removing some whitespace to a JSON expression does not change its meaning. At the same time, reordering the fields of records clearly does change the meaning. So what does it mean for a reader to have "the same" schema as a writer? + +Parsing Canonical Form is a transformation of a writer's schema that let's us define what it means for two schemas to be "the same" for the purpose of reading data written against the schema. It is called Parsing Canonical Form because the transformations strip away parts of the schema, like "doc" attributes, that are irrelevant to readers trying to parse incoming data. It is called Canonical Form because the transformations normalize the JSON text (such as the order of attributes) in a way that eliminates unimportant differences between schemas. If the Parsing Canonical Forms of two different schemas are textually equal, then those schemas are "the same" as far as any reader is concerned, i.e., there is no serialized data that would allow a reader to distinguish data generated by a writer using one of the original schemas from data generated by a writing using the other original schema. (We sketch a proof of this property in a companion document.) + +The next subsection specifies the transformations that define Parsing Canonical Form. But with a well-defined canonical form, it can be convenient to go one step further, transforming these canonical forms into simple integers ("fingerprints") that can be used to uniquely identify schemas. The subsection after next recommends some standard practices for generating such fingerprints. + +#### Transforming into Parsing Canonical Form +Assuming an input schema (in JSON form) that's already UTF-8 text for a _valid_ Avro schema (including all quotes as required by JSON), the following transformations will produce its Parsing Canonical Form: + +* [PRIMITIVES] Convert primitive schemas to their simple form (e.g., int instead of `{"type":"int"}`). +* [FULLNAMES] Replace short names with fullnames, using applicable namespaces to do so. Then eliminate namespace attributes, which are now redundant. +* [STRIP] Keep only attributes that are relevant to parsing data, which are: _type_, _name_, _fields_, _symbols_, _items_, _values_, _size_. Strip all others (e.g., _doc_ and _aliases_). +* [ORDER] Order the appearance of fields of JSON objects as follows: _name_, _type_, _fields_, _symbols_, _items_, _values_, _size_. For example, if an object has _type_, _name_, and _size_ fields, then the _name_ field should appear first, followed by the _type_ and then the _size_ fields. +* [STRINGS] For all JSON string literals in the schema text, replace any escaped characters (e.g., \uXXXX escapes) with their UTF-8 equivalents. +* [INTEGERS] Eliminate quotes around and any leading zeros in front of JSON integer literals (which appear in the _size_ attributes of _fixed_ schemas). +* [WHITESPACE] Eliminate all whitespace in JSON outside of string literals. + +#### Schema Fingerprints {#schema-fingerprints} +"[A] fingerprinting algorithm is a procedure that maps an arbitrarily large data item (such as a computer file) to a much shorter bit string, its fingerprint, that uniquely identifies the original data for all practical purposes" (quoted from [Wikipedia](https://en.wikipedia.org/wiki/Fingerprint_(computing))). In the Avro context, fingerprints of Parsing Canonical Form can be useful in a number of applications; for example, to cache encoder and decoder objects, to tag data items with a short substitute for the writer's full schema, and to quickly negotiate common-case schemas between readers and writers. + +In designing fingerprinting algorithms, there is a fundamental trade-off between the length of the fingerprint and the probability of collisions. To help application designers find appropriate points within this trade-off space, while encouraging interoperability and ease of implementation, we recommend using one of the following three algorithms when fingerprinting Avro schemas: + +* When applications can tolerate longer fingerprints, we recommend using the [SHA-256 digest algorithm](https://en.wikipedia.org/wiki/SHA-2) to generate 256-bit fingerprints of Parsing Canonical Forms. Most languages today have SHA-256 implementations in their libraries. +* At the opposite extreme, the smallest fingerprint we recommend is a 64-bit [Rabin fingerprint](https://en.wikipedia.org/wiki/Rabin_fingerprint). Below, we provide pseudo-code for this algorithm that can be easily translated into any programming language. 64-bit fingerprints should guarantee uniqueness for schema caches of up to a million entries (for such a cache, the chance of a collision is 3E-8). We don't recommend shorter fingerprints, as the chances of collisions is too great (for example, with 32-bit fingerprints, a cache with as few as 100,000 schemas has a 50% chance of having a collision). +* Between these two extremes, we recommend using the [MD5 message digest](https://en.wikipedia.org/wiki/MD5) to generate 128-bit fingerprints. These make sense only where very large numbers of schemas are being manipulated (tens of millions); otherwise, 64-bit fingerprints should be sufficient. As with SHA-256, MD5 implementations are found in most libraries today. + +These fingerprints are not meant to provide any security guarantees, even the longer SHA-256-based ones. Most Avro applications should be surrounded by security measures that prevent attackers from writing random data and otherwise interfering with the consumers of schemas. We recommend that these surrounding mechanisms be used to prevent collision and pre-image attacks (i.e., "forgery") on schema fingerprints, rather than relying on the security properties of the fingerprints themselves. + +Rabin fingerprints are [cyclic redundancy checks](https://en.wikipedia.org/wiki/Cyclic_redundancy_check) computed using irreducible polynomials. In the style of the Appendix of [RFC 1952](https://www.ietf.org/rfc/rfc1952.txt) (pg 10), which defines the CRC-32 algorithm, here's our definition of the 64-bit AVRO fingerprinting algorithm: +```java +long fingerprint64(byte[] buf) { + if (FP_TABLE == null) initFPTable(); + long fp = EMPTY; + for (int i = 0; i < buf.length; i++) + fp = (fp >>> 8) ^ FP_TABLE[(int)(fp ^ buf[i]) & 0xff]; + return fp; +} + +static long EMPTY = 0xc15d213aa4d7a795L; +static long[] FP_TABLE = null; + +void initFPTable() { + FP_TABLE = new long[256]; + for (int i = 0; i < 256; i++) { + long fp = i; + for (int j = 0; j < 8; j++) + fp = (fp >>> 1) ^ (EMPTY & -(fp & 1L)); + FP_TABLE[i] = fp; + } +} +``` + +Readers interested in the mathematics behind this algorithm may want to read [Chapter 14 of the Second Edition of Hacker's Delight](https://books.google.com/books?id=XD9iAwAAQBAJ&pg=PA319). (Unlike RFC-1952 and the book chapter, we prepend a single one bit to messages. We do this because CRCs ignore leading zero bits, which can be problematic. Our code prepends a one-bit by initializing fingerprints using EMPTY, rather than initializing using zero as in RFC-1952 and the book chapter.) + +## Logical Types +A logical type is an Avro primitive or complex type with extra attributes to represent a derived type. The attribute `logicalType` must always be present for a logical type, and is a string with the name of one of the logical types listed later in this section. Other attributes may be defined for particular logical types. + +A logical type is always serialized using its underlying Avro type so that values are encoded in exactly the same way as the equivalent Avro type that does not have a `logicalType` attribute. Language implementations may choose to represent logical types with an appropriate native type, although this is not required. + +Language implementations must ignore unknown logical types when reading, and should use the underlying Avro type. If a logical type is invalid, for example a decimal with scale greater than its precision, then implementations should ignore the logical type and use the underlying Avro type. + +### Decimal +The `decimal` logical type represents an arbitrary-precision signed decimal number of the form _unscaled × 10-scale_. + +A `decimal` logical type annotates Avro _bytes_ or _fixed_ types. The byte array must contain the two's-complement representation of the unscaled integer value in big-endian byte order. The scale is fixed, and is specified using an attribute. + +The following attributes are supported: + +* _scale_, a JSON integer representing the scale (optional). If not specified the scale is 0. +* _precision_, a JSON integer representing the (maximum) precision of decimals stored in this type (required). +For example, the following schema represents decimal numbers with a maximum precision of 4 and a scale of 2: +```json +{ + "type": "bytes", + "logicalType": "decimal", + "precision": 4, + "scale": 2 +} +``` +Precision must be a positive integer greater than zero. If the underlying type is a _fixed_, then the precision is limited by its size. An array of length n can store at most _floor(log10(28 × n - 1 - 1))_ base-10 digits of precision. + +Scale must be zero or a positive integer less than or equal to the precision. + +For the purposes of schema resolution, two schemas that are `decimal` logical types _match_ if their scales and precisions match. + +### UUID +The `uuid` logical type represents a random generated universally unique identifier (UUID). + +A `uuid` logical type annotates an Avro `string`. The string has to conform with [RFC-4122](https://www.ietf.org/rfc/rfc4122.txt) + +### Date +The `date` logical type represents a date within the calendar, with no reference to a particular time zone or time of day. + +A `date` logical type annotates an Avro `int`, where the int stores the number of days from the unix epoch, 1 January 1970 (ISO calendar). + +The following schema represents a date: +```json +{ + "type": "int", + "logicalType": "date" +} +``` + +### Time (millisecond precision) +The `time-millis` logical type represents a time of day, with no reference to a particular calendar, time zone or date, with a precision of one millisecond. + +A `time-millis` logical type annotates an Avro `int`, where the int stores the number of milliseconds after midnight, 00:00:00.000. + +### Time (microsecond precision) +The `time-micros` logical type represents a time of day, with no reference to a particular calendar, time zone or date, with a precision of one microsecond. + +A `time-micros` logical type annotates an Avro `long`, where the long stores the number of microseconds after midnight, 00:00:00.000000. + +### Timestamp (millisecond precision) +The `timestamp-millis` logical type represents an instant on the global timeline, independent of a particular time zone or calendar, with a precision of one millisecond. Please note that time zone information gets lost in this process. Upon reading a value back, we can only reconstruct the instant, but not the original representation. In practice, such timestamps are typically displayed to users in their local time zones, therefore they may be displayed differently depending on the execution environment. + +A `timestamp-millis` logical type annotates an Avro `long`, where the long stores the number of milliseconds from the unix epoch, 1 January 1970 00:00:00.000 UTC. + +### Timestamp (microsecond precision) +The `timestamp-micros` logical type represents an instant on the global timeline, independent of a particular time zone or calendar, with a precision of one microsecond. Please note that time zone information gets lost in this process. Upon reading a value back, we can only reconstruct the instant, but not the original representation. In practice, such timestamps are typically displayed to users in their local time zones, therefore they may be displayed differently depending on the execution environment. + +A `timestamp-micros` logical type annotates an Avro `long`, where the long stores the number of microseconds from the unix epoch, 1 January 1970 00:00:00.000000 UTC. + +### Local timestamp (millisecond precision) +The `local-timestamp-millis` logical type represents a timestamp in a local timezone, regardless of what specific time zone is considered local, with a precision of one millisecond. + +A `local-timestamp-millis` logical type annotates an Avro `long`, where the long stores the number of milliseconds, from 1 January 1970 00:00:00.000. + +### Local timestamp (microsecond precision) +The `local-timestamp-micros` logical type represents a timestamp in a local timezone, regardless of what specific time zone is considered local, with a precision of one microsecond. + +A `local-timestamp-micros` logical type annotates an Avro `long`, where the long stores the number of microseconds, from 1 January 1970 00:00:00.000000. + +### Duration +The `duration` logical type represents an amount of time defined by a number of months, days and milliseconds. This is not equivalent to a number of milliseconds, because, depending on the moment in time from which the duration is measured, the number of days in the month and number of milliseconds in a day may differ. Other standard periods such as years, quarters, hours and minutes can be expressed through these basic periods. + +A `duration` logical type annotates Avro `fixed` type of size 12, which stores three little-endian unsigned integers that represent durations at different granularities of time. The first stores a number in months, the second stores a number in days, and the third stores a number in milliseconds. diff --git a/doc/content/en/docs/1.11.3/_index.md b/doc/content/en/docs/1.11.3/_index.md new file mode 100755 index 00000000000..55094abf161 --- /dev/null +++ b/doc/content/en/docs/1.11.3/_index.md @@ -0,0 +1,59 @@ +--- +title: "Apache Avroâ„ĸ 1.11.3 Documentation" +linkTitle: "1.11.3" +type: docs +weight: -1113 +--- + + + +## Introduction + +Apache Avroâ„ĸ is a data serialization system. + +Avro provides: + +* Rich data structures. +* A compact, fast, binary data format. +* A container file, to store persistent data. +* Remote procedure call (RPC). +* Simple integration with dynamic languages. Code generation is not required to read or write data files nor to use or implement RPC protocols. Code generation as an optional optimization, only worth implementing for statically typed languages. + +## Schemas + +Avro relies on schemas. When Avro data is read, the schema used when writing it is always present. This permits each datum to be written with no per-value overheads, making serialization both fast and small. This also facilitates use with dynamic, scripting languages, since data, together with its schema, is fully self-describing. + +When Avro data is stored in a file, its schema is stored with it, so that files may be processed later by any program. If the program reading the data expects a different schema this can be easily resolved, since both schemas are present. + +When Avro is used in RPC, the client and server exchange schemas in the connection handshake. (This can be optimized so that, for most calls, no schemas are actually transmitted.) Since both client and server both have the other's full schema, correspondence between same named fields, missing fields, extra fields, etc. can all be easily resolved. + +Avro schemas are defined with JSON . This facilitates implementation in languages that already have JSON libraries. + +## Comparison with other systems + +Avro provides functionality similar to systems such as [Thrift](https://thrift.apache.org/), [Protocol Buffers](https://code.google.com/p/protobuf/), etc. Avro differs from these systems in the following fundamental aspects. + +* Dynamic typing: Avro does not require that code be generated. Data is always accompanied by a schema that permits full processing of that data without code generation, static datatypes, etc. This facilitates construction of generic data-processing systems and languages. +* Untagged data: Since the schema is present when data is read, considerably less type information need be encoded with data, resulting in smaller serialization size. +* No manually-assigned field IDs: When a schema changes, both the old and new schema are always present when processing data, so differences may be resolved symbolically, using field names. + + diff --git a/doc/content/en/docs/1.11.3/api-c++.md b/doc/content/en/docs/1.11.3/api-c++.md new file mode 100644 index 00000000000..f5cdb59514b --- /dev/null +++ b/doc/content/en/docs/1.11.3/api-c++.md @@ -0,0 +1,29 @@ +--- +title: "C++ API" +linkTitle: "C++ API" +weight: 102 +manualLink: /docs/1.11.3/api/cpp/html/ +--- + + + +The C++ API documentation can be found here. diff --git a/doc/content/en/docs/1.11.3/api-c.md b/doc/content/en/docs/1.11.3/api-c.md new file mode 100644 index 00000000000..89d71dac688 --- /dev/null +++ b/doc/content/en/docs/1.11.3/api-c.md @@ -0,0 +1,29 @@ +--- +title: "C API" +linkTitle: "C API" +weight: 101 +manualLink: /docs/1.11.3/api/c/ +--- + + + +The C API documentation can be found here. diff --git a/doc/content/en/docs/1.11.3/api-csharp.md b/doc/content/en/docs/1.11.3/api-csharp.md new file mode 100644 index 00000000000..1f052a327a7 --- /dev/null +++ b/doc/content/en/docs/1.11.3/api-csharp.md @@ -0,0 +1,29 @@ +--- +title: "C# API" +linkTitle: "C# API" +weight: 103 +manualLink: /docs/1.11.3/api/csharp/html/ +--- + + + +The C# API documentation can be found here. diff --git a/doc/content/en/docs/1.11.3/api-java.md b/doc/content/en/docs/1.11.3/api-java.md new file mode 100644 index 00000000000..92f57e26411 --- /dev/null +++ b/doc/content/en/docs/1.11.3/api-java.md @@ -0,0 +1,29 @@ +--- +title: "Java API" +linkTitle: "Java API" +weight: 100 +manualLink: /docs/1.11.3/api/java/ +--- + + + +The Javadocs can be found here. diff --git a/doc/content/en/docs/1.11.3/api-py.md b/doc/content/en/docs/1.11.3/api-py.md new file mode 100644 index 00000000000..82b5898932b --- /dev/null +++ b/doc/content/en/docs/1.11.3/api-py.md @@ -0,0 +1,29 @@ +--- +title: "Python API" +linkTitle: "Python API" +weight: 104 +manualLink: /docs/1.11.3/api/py/html/ +--- + + + +The Python API documentation can be found here. diff --git a/doc/content/en/docs/1.11.3/logo.svg b/doc/content/en/docs/1.11.3/logo.svg new file mode 100644 index 00000000000..b44ed197262 --- /dev/null +++ b/doc/content/en/docs/1.11.3/logo.svg @@ -0,0 +1,22 @@ + + + diff --git a/doc/content/en/docs/1.11.4/Getting started (Java)/_index.md b/doc/content/en/docs/1.11.4/Getting started (Java)/_index.md new file mode 100644 index 00000000000..a470a0291c8 --- /dev/null +++ b/doc/content/en/docs/1.11.4/Getting started (Java)/_index.md @@ -0,0 +1,289 @@ +--- +categories: [] +tags: ["java"] +title: "Getting Started (Java)" +linkTitle: "Getting Started (Java)" +weight: 2 +--- + + + +This is a short guide for getting started with Apache Avroâ„ĸ using Java. This guide only covers using Avro for data serialization; see Patrick Hunt's [Avro RPC Quick Start](https://github.com/phunt/avro-rpc-quickstart) for a good introduction to using Avro for RPC. + +## Download + +Avro implementations for C, C++, C#, Java, PHP, Python, and Ruby can be downloaded from the [Apache Avroâ„ĸ Download]({{< relref "/project/download" >}}) page. This guide uses Avro 1.11.4, the latest version at the time of writing. For the examples in this guide, download avro-1.11.4.jar and avro-tools-1.11.4.jar. + +Alternatively, if you are using Maven, add the following dependency to your POM: + +```xml + + org.apache.avro + avro + 1.11.4 + +``` + +As well as the Avro Maven plugin (for performing code generation): + +```xml + + org.apache.avro + avro-maven-plugin + 1.11.4 + + + generate-sources + + schema + + + ${project.basedir}/src/main/avro/ + ${project.basedir}/src/main/java/ + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + 1.8 + 1.8 + + +``` + +You may also build the required Avro jars from source. Building Avro is beyond the scope of this guide; see the Build Documentation page in the wiki for more information. + +## Defining a schema + +Avro schemas are defined using JSON. Schemas are composed of primitive types (null, boolean, int, long, float, double, bytes, and string) and complex types (record, enum, array, map, union, and fixed). You can learn more about Avro schemas and types from the specification, but for now let's start with a simple schema example, user.avsc: + +```json +{"namespace": "example.avro", + "type": "record", + "name": "User", + "fields": [ + {"name": "name", "type": "string"}, + {"name": "favorite_number", "type": ["int", "null"]}, + {"name": "favorite_color", "type": ["string", "null"]} + ] +} +``` + +This schema defines a record representing a hypothetical user. (Note that a schema file can only contain a single schema definition.) At minimum, a record definition must include its type ("type": "record"), a name ("name": "User"), and fields, in this case name, favorite_number, and favorite_color. We also define a namespace ("namespace": "example.avro"), which together with the name attribute defines the "full name" of the schema (example.avro.User in this case). + +Fields are defined via an array of objects, each of which defines a name and type (other attributes are optional, see the record specification for more details). The type attribute of a field is another schema object, which can be either a primitive or complex type. For example, the name field of our User schema is the primitive type string, whereas the favorite_number and favorite_color fields are both unions, represented by JSON arrays. unions are a complex type that can be any of the types listed in the array; e.g., favorite_number can either be an int or null, essentially making it an optional field. + +## Serializing and deserializing with code generation + +### Compiling the schema +Code generation allows us to automatically create classes based on our previously-defined schema. Once we have defined the relevant classes, there is no need to use the schema directly in our programs. We use the avro-tools jar to generate code as follows: + +```shell +java -jar /path/to/avro-tools-1.11.4.jar compile schema +``` + +This will generate the appropriate source files in a package based on the schema's namespace in the provided destination folder. For instance, to generate a User class in package example.avro from the schema defined above, run + +```shell +java -jar /path/to/avro-tools-1.11.4.jar compile schema user.avsc . +``` + +Note that if you using the Avro Maven plugin, there is no need to manually invoke the schema compiler; the plugin automatically performs code generation on any .avsc files present in the configured source directory. + +### Creating Users +Now that we've completed the code generation, let's create some Users, serialize them to a data file on disk, and then read back the file and deserialize the User objects. + +First let's create some Users and set their fields. + +```java +User user1 = new User(); +user1.setName("Alyssa"); +user1.setFavoriteNumber(256); +// Leave favorite color null + +// Alternate constructor +User user2 = new User("Ben", 7, "red"); + +// Construct via builder +User user3 = User.newBuilder() + .setName("Charlie") + .setFavoriteColor("blue") + .setFavoriteNumber(null) + .build(); +``` + +As shown in this example, Avro objects can be created either by invoking a constructor directly or by using a builder. Unlike constructors, builders will automatically set any default values specified in the schema. Additionally, builders validate the data as it set, whereas objects constructed directly will not cause an error until the object is serialized. However, using constructors directly generally offers better performance, as builders create a copy of the datastructure before it is written. + +Note that we do not set user1's favorite color. Since that record is of type ["string", "null"], we can either set it to a string or leave it null; it is essentially optional. Similarly, we set user3's favorite number to null (using a builder requires setting all fields, even if they are null). + +### Serializing +Now let's serialize our Users to disk. + +```java +// Serialize user1, user2 and user3 to disk +DatumWriter userDatumWriter = new SpecificDatumWriter(User.class); +DataFileWriter dataFileWriter = new DataFileWriter(userDatumWriter); +dataFileWriter.create(user1.getSchema(), new File("users.avro")); +dataFileWriter.append(user1); +dataFileWriter.append(user2); +dataFileWriter.append(user3); +dataFileWriter.close(); +``` + +We create a DatumWriter, which converts Java objects into an in-memory serialized format. The SpecificDatumWriter class is used with generated classes and extracts the schema from the specified generated type. + +Next we create a DataFileWriter, which writes the serialized records, as well as the schema, to the file specified in the dataFileWriter.create call. We write our users to the file via calls to the dataFileWriter.append method. When we are done writing, we close the data file. + +### Deserializing +Finally, let's deserialize the data file we just created. + +```java +// Deserialize Users from disk +DatumReader userDatumReader = new SpecificDatumReader(User.class); +DataFileReader dataFileReader = new DataFileReader(file, userDatumReader); +User user = null; +while (dataFileReader.hasNext()) { +// Reuse user object by passing it to next(). This saves us from +// allocating and garbage collecting many objects for files with +// many items. +user = dataFileReader.next(user); +System.out.println(user); +} +``` + +This snippet will output: + +```json +{"name": "Alyssa", "favorite_number": 256, "favorite_color": null} +{"name": "Ben", "favorite_number": 7, "favorite_color": "red"} +{"name": "Charlie", "favorite_number": null, "favorite_color": "blue"} +``` + +Deserializing is very similar to serializing. We create a SpecificDatumReader, analogous to the SpecificDatumWriter we used in serialization, which converts in-memory serialized items into instances of our generated class, in this case User. We pass the DatumReader and the previously created File to a DataFileReader, analogous to the DataFileWriter, which reads both the schema used by the writer as well as the data from the file on disk. The data will be read using the writer's schema included in the file and the schema provided by the reader, in this case the User class. The writer's schema is needed to know the order in which fields were written, while the reader's schema is needed to know what fields are expected and how to fill in default values for fields added since the file was written. If there are differences between the two schemas, they are resolved according to the Schema Resolution specification. + +Next we use the DataFileReader to iterate through the serialized Users and print the deserialized object to stdout. Note how we perform the iteration: we create a single User object which we store the current deserialized user in, and pass this record object to every call of dataFileReader.next. This is a performance optimization that allows the DataFileReader to reuse the same User object rather than allocating a new User for every iteration, which can be very expensive in terms of object allocation and garbage collection if we deserialize a large data file. While this technique is the standard way to iterate through a data file, it's also possible to use for (User user : dataFileReader) if performance is not a concern. + +### Compiling and running the example code +This example code is included as a Maven project in the examples/java-example directory in the Avro docs. From this directory, execute the following commands to build and run the example: + +```shell +$ mvn compile # includes code generation via Avro Maven plugin +$ mvn -q exec:java -Dexec.mainClass=example.SpecificMain +``` + +### Beta feature: Generating faster code +In release 1.9.0, we introduced a new approach to generating code that speeds up decoding of objects by more than 10% and encoding by more than 30% (future performance enhancements are underway). To ensure a smooth introduction of this change into production systems, this feature is controlled by a feature flag, the system property org.apache.avro.specific.use_custom_coders. In this first release, this feature is off by default. To turn it on, set the system flag to true at runtime. In the sample above, for example, you could enable the fater coders as follows: + +$ mvn -q exec:java -Dexec.mainClass=example.SpecificMain \ + -Dorg.apache.avro.specific.use_custom_coders=true + +Note that you do not have to recompile your Avro schema to have access to this feature. The feature is compiled and built into your code, and you turn it on and off at runtime using the feature flag. As a result, you can turn it on during testing, for example, and then off in production. Or you can turn it on in production, and quickly turn it off if something breaks. + +We encourage the Avro community to exercise this new feature early to help build confidence. (For those paying one-demand for compute resources in the cloud, it can lead to meaningful cost savings.) As confidence builds, we will turn this feature on by default, and eventually eliminate the feature flag (and the old code). + +## Serializing and deserializing without code generation +Data in Avro is always stored with its corresponding schema, meaning we can always read a serialized item regardless of whether we know the schema ahead of time. This allows us to perform serialization and deserialization without code generation. + +Let's go over the same example as in the previous section, but without using code generation: we'll create some users, serialize them to a data file on disk, and then read back the file and deserialize the users objects. + +### Creating users +First, we use a Parser to read our schema definition and create a Schema object. + +```java +Schema schema = new Schema.Parser().parse(new File("user.avsc")); +``` + +Using this schema, let's create some users. + +```java +GenericRecord user1 = new GenericData.Record(schema); +user1.put("name", "Alyssa"); +user1.put("favorite_number", 256); +// Leave favorite color null + +GenericRecord user2 = new GenericData.Record(schema); +user2.put("name", "Ben"); +user2.put("favorite_number", 7); +user2.put("favorite_color", "red"); +``` + +Since we're not using code generation, we use GenericRecords to represent users. GenericRecord uses the schema to verify that we only specify valid fields. If we try to set a non-existent field (e.g., user1.put("favorite_animal", "cat")), we'll get an AvroRuntimeException when we run the program. + +Note that we do not set user1's favorite color. Since that record is of type ["string", "null"], we can either set it to a string or leave it null; it is essentially optional. + +### Serializing +Now that we've created our user objects, serializing and deserializing them is almost identical to the example above which uses code generation. The main difference is that we use generic instead of specific readers and writers. + +First we'll serialize our users to a data file on disk. + +```java +// Serialize user1 and user2 to disk +File file = new File("users.avro"); +DatumWriter datumWriter = new GenericDatumWriter(schema); +DataFileWriter dataFileWriter = new DataFileWriter(datumWriter); +dataFileWriter.create(schema, file); +dataFileWriter.append(user1); +dataFileWriter.append(user2); +dataFileWriter.close(); +``` + +We create a DatumWriter, which converts Java objects into an in-memory serialized format. Since we are not using code generation, we create a GenericDatumWriter. It requires the schema both to determine how to write the GenericRecords and to verify that all non-nullable fields are present. + +As in the code generation example, we also create a DataFileWriter, which writes the serialized records, as well as the schema, to the file specified in the dataFileWriter.create call. We write our users to the file via calls to the dataFileWriter.append method. When we are done writing, we close the data file. + +### Deserializing +Finally, we'll deserialize the data file we just created. + +```java +// Deserialize users from disk +DatumReader datumReader = new GenericDatumReader(schema); +DataFileReader dataFileReader = new DataFileReader(file, datumReader); +GenericRecord user = null; +while (dataFileReader.hasNext()) { +// Reuse user object by passing it to next(). This saves us from +// allocating and garbage collecting many objects for files with +// many items. +user = dataFileReader.next(user); +System.out.println(user); +``` + +This outputs: + +```json +{"name": "Alyssa", "favorite_number": 256, "favorite_color": null} +{"name": "Ben", "favorite_number": 7, "favorite_color": "red"} +``` + +Deserializing is very similar to serializing. We create a GenericDatumReader, analogous to the GenericDatumWriter we used in serialization, which converts in-memory serialized items into GenericRecords. We pass the DatumReader and the previously created File to a DataFileReader, analogous to the DataFileWriter, which reads both the schema used by the writer as well as the data from the file on disk. The data will be read using the writer's schema included in the file, and the reader's schema provided to the GenericDatumReader. The writer's schema is needed to know the order in which fields were written, while the reader's schema is needed to know what fields are expected and how to fill in default values for fields added since the file was written. If there are differences between the two schemas, they are resolved according to the Schema Resolution specification. + +Next, we use the DataFileReader to iterate through the serialized users and print the deserialized object to stdout. Note how we perform the iteration: we create a single GenericRecord object which we store the current deserialized user in, and pass this record object to every call of dataFileReader.next. This is a performance optimization that allows the DataFileReader to reuse the same record object rather than allocating a new GenericRecord for every iteration, which can be very expensive in terms of object allocation and garbage collection if we deserialize a large data file. While this technique is the standard way to iterate through a data file, it's also possible to use for (GenericRecord user : dataFileReader) if performance is not a concern. + +### Compiling and running the example code +This example code is included as a Maven project in the examples/java-example directory in the Avro docs. From this directory, execute the following commands to build and run the example: + +```shell +$ mvn compile +$ mvn -q exec:java -Dexec.mainClass=example.GenericMain +``` diff --git a/doc/content/en/docs/1.11.4/Getting started (Python)/_index.md b/doc/content/en/docs/1.11.4/Getting started (Python)/_index.md new file mode 100644 index 00000000000..d4e40a6146d --- /dev/null +++ b/doc/content/en/docs/1.11.4/Getting started (Python)/_index.md @@ -0,0 +1,147 @@ +--- +categories: [] +tags: ["python"] +title: "Getting Started (Python)" +linkTitle: "Getting Started (Python)" +weight: 3 +--- + + + +This is a short guide for getting started with Apache Avroâ„ĸ using Python. This guide only covers using Avro for data serialization; see Patrick Hunt's Avro RPC Quick Start for a good introduction to using Avro for RPC. + +## Notice for Python 3 users +A package called "avro-python3" had been provided to support Python 3 previously, but the codebase was consolidated into the "avro" package and that supports both Python 2 and 3 now. The avro-python3 package will be removed in the near future, so users should use the "avro" package instead. They are mostly API compatible, but there's a few minor difference (e.g., function name capitalization, such as avro.schema.Parse vs avro.schema.parse). + +## Download +For Python, the easiest way to get started is to install it from PyPI. Python's Avro API is available over PyPi. + +```shell +$ python3 -m pip install avro +``` + +The official releases of the Avro implementations for C, C++, C#, Java, PHP, Python, and Ruby can be downloaded from the Apache Avroâ„ĸ Releases page. This guide uses Avro 1.11.4, the latest version at the time of writing. Download and unzip avro-1.11.4.tar.gz, and install via python setup.py (this will probably require root privileges). Ensure that you can import avro from a Python prompt. + +```shell +$ tar xvf avro-1.11.4.tar.gz +$ cd avro-1.11.4 +$ python setup.py install +$ python +>>> import avro # should not raise ImportError +``` + +Alternatively, you may build the Avro Python library from source. From your the root Avro directory, run the commands + +```shell +$ cd lang/py/ +$ python3 -m pip install -e . +$ python +``` + +## Defining a schema +Avro schemas are defined using JSON. Schemas are composed of primitive types (null, boolean, int, long, float, double, bytes, and string) and complex types (record, enum, array, map, union, and fixed). You can learn more about Avro schemas and types from the specification, but for now let's start with a simple schema example, user.avsc: + +```json +{"namespace": "example.avro", + "type": "record", + "name": "User", + "fields": [ + {"name": "name", "type": "string"}, + {"name": "favorite_number", "type": ["int", "null"]}, + {"name": "favorite_color", "type": ["string", "null"]} + ] +} +``` + +This schema defines a record representing a hypothetical user. (Note that a schema file can only contain a single schema definition.) At minimum, a record definition must include its type ("type": "record"), a name ("name": "User"), and fields, in this case name, favorite_number, and favorite_color. We also define a namespace ("namespace": "example.avro"), which together with the name attribute defines the "full name" of the schema (example.avro.User in this case). + +Fields are defined via an array of objects, each of which defines a name and type (other attributes are optional, see the record specification for more details). The type attribute of a field is another schema object, which can be either a primitive or complex type. For example, the name field of our User schema is the primitive type string, whereas the favorite_number and favorite_color fields are both unions, represented by JSON arrays. unions are a complex type that can be any of the types listed in the array; e.g., favorite_number can either be an int or null, essentially making it an optional field. + +## Serializing and deserializing without code generation +Data in Avro is always stored with its corresponding schema, meaning we can always read a serialized item, regardless of whether we know the schema ahead of time. This allows us to perform serialization and deserialization without code generation. Note that the Avro Python library does not support code generation. + +Try running the following code snippet, which serializes two users to a data file on disk, and then reads back and deserializes the data file: + +```python +import avro.schema +from avro.datafile import DataFileReader, DataFileWriter +from avro.io import DatumReader, DatumWriter + +schema = avro.schema.parse(open("user.avsc", "rb").read()) + +writer = DataFileWriter(open("users.avro", "wb"), DatumWriter(), schema) +writer.append({"name": "Alyssa", "favorite_number": 256}) +writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"}) +writer.close() + +reader = DataFileReader(open("users.avro", "rb"), DatumReader()) +for user in reader: + print(user) +reader.close() +``` + +This outputs: + +```json +{u'favorite_color': None, u'favorite_number': 256, u'name': u'Alyssa'} +{u'favorite_color': u'red', u'favorite_number': 7, u'name': u'Ben'} +``` + +Do make sure that you open your files in binary mode (i.e. using the modes wb or rb respectively). Otherwise you might generate corrupt files due to automatic replacement of newline characters with the platform-specific representations. + +Let's take a closer look at what's going on here. + +```python +schema = avro.schema.parse(open("user.avsc", "rb").read()) +``` + +avro.schema.parse takes a string containing a JSON schema definition as input and outputs a avro.schema.Schema object (specifically a subclass of Schema, in this case RecordSchema). We're passing in the contents of our user.avsc schema file here. + +```python +writer = DataFileWriter(open("users.avro", "wb"), DatumWriter(), schema) +``` + +We create a DataFileWriter, which we'll use to write serialized items to a data file on disk. The DataFileWriter constructor takes three arguments: + +* The file we'll serialize to +* A DatumWriter, which is responsible for actually serializing the items to Avro's binary format (DatumWriters can be used separately from DataFileWriters, e.g., to perform IPC with Avro). +* The schema we're using. The DataFileWriter needs the schema both to write the schema to the data file, and to verify that the items we write are valid items and write the appropriate fields. + +```python +writer.append({"name": "Alyssa", "favorite_number": 256}) +writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"}) +``` + +We use DataFileWriter.append to add items to our data file. Avro records are represented as Python dicts. Since the field favorite_color has type ["string", "null"], we are not required to specify this field, as shown in the first append. Were we to omit the required name field, an exception would be raised. Any extra entries not corresponding to a field are present in the dict are ignored. + +```python +reader = DataFileReader(open("users.avro", "rb"), DatumReader()) +``` + +We open the file again, this time for reading back from disk. We use a DataFileReader and DatumReader analagous to the DataFileWriter and DatumWriter above. + +```python +for user in reader: + print(user) +``` + +The DataFileReader is an iterator that returns dicts corresponding to the serialized items. diff --git a/doc/content/en/docs/1.11.4/IDL Language/_index.md b/doc/content/en/docs/1.11.4/IDL Language/_index.md new file mode 100644 index 00000000000..f50b0a489be --- /dev/null +++ b/doc/content/en/docs/1.11.4/IDL Language/_index.md @@ -0,0 +1,435 @@ +--- +title: "IDL Language" +linkTitle: "IDL Language" +weight: 201 +--- + + + +## Introduction +This document defines Avro IDL, a higher-level language for authoring Avro schemata. Before reading this document, you should have familiarity with the concepts of schemata and protocols, as well as the various primitive and complex types available in Avro. + +## Overview + +### Purpose +The aim of the Avro IDL language is to enable developers to author schemata in a way that feels more similar to common programming languages like Java, C++, or Python. Additionally, the Avro IDL language may feel more familiar for those users who have previously used the interface description languages (IDLs) in other frameworks like Thrift, Protocol Buffers, or CORBA. + +### Usage +Each Avro IDL file defines a single Avro Protocol, and thus generates as its output a JSON-format Avro Protocol file with extension .avpr. + +To convert a _.avdl_ file into a _.avpr_ file, it may be processed by the `idl` tool. For example: +```shell +$ java -jar avro-tools.jar idl src/test/idl/input/namespaces.avdl /tmp/namespaces.avpr +$ head /tmp/namespaces.avpr +{ + "protocol" : "TestNamespace", + "namespace" : "avro.test.protocol", +``` +The `idl` tool can also process input to and from _stdin_ and _stdout_. See `idl --help` for full usage information. + +A Maven plugin is also provided to compile .avdl files. To use it, add something like the following to your pom.xml: +```xml + + + + org.apache.avro + avro-maven-plugin + + + + idl-protocol + + + + + + +``` + +## Defining a Protocol in Avro IDL +An Avro IDL file consists of exactly one protocol definition. The minimal protocol is defined by the following code: +```java +protocol MyProtocol { +} +``` +This is equivalent to (and generates) the following JSON protocol definition: +```json +{ +"protocol" : "MyProtocol", + "types" : [ ], + "messages" : { + } +} +``` +The namespace of the protocol may be changed using the @namespace annotation: +```java +@namespace("mynamespace") +protocol MyProtocol { +} +``` +This notation is used throughout Avro IDL as a way of specifying properties for the annotated element, as will be described later in this document. + +Protocols in Avro IDL can contain the following items: + +* Imports of external protocol and schema files. +* Definitions of named schemata, including records, errors, enums, and fixeds. +* Definitions of RPC messages + +## Imports +Files may be imported in one of three formats: + +* An IDL file may be imported with a statement like: + + `import idl "foo.avdl";` + +* A JSON protocol file may be imported with a statement like: + + `import protocol "foo.avpr";` + +* A JSON schema file may be imported with a statement like: + + `import schema "foo.avsc";` + +Messages and types in the imported file are added to this file's protocol. + +Imported file names are resolved relative to the current IDL file. + +## Defining an Enumeration +Enums are defined in Avro IDL using a syntax similar to C or Java. An Avro Enum supports optional default values. In the case that a reader schema is unable to recognize a symbol written by the writer, the reader will fall back to using the defined default value. This default is only used when an incompatible symbol is read. It is not used if the enum field is missing. + +Example Writer Enum Definition +```java +enum Shapes { + SQUARE, TRIANGLE, CIRCLE, OVAL +} +``` +Example Reader Enum Definition +```java +enum Shapes { + SQUARE, TRIANGLE, CIRCLE +} = CIRCLE; +``` +In the above example, the reader will use the default value of `CIRCLE` whenever reading data written with the `OVAL` symbol of the writer. Also note that, unlike the JSON format, anonymous enums cannot be defined. + +## Defining a Fixed Length Field +Fixed fields are defined using the following syntax: +``` +fixed MD5(16); +``` +This example defines a fixed-length type called MD5 which contains 16 bytes. + +## Defining Records and Errors +Records are defined in Avro IDL using a syntax similar to a struct definition in C: +```java +record Employee { + string name; + boolean active = true; + long salary; +} +``` +The above example defines a record with the name “Employee” with three fields. + +To define an error, simply use the keyword _error_ instead of _record_. For example: +```java +error Kaboom { + string explanation; + int result_code = -1; +} +``` +Each field in a record or error consists of a type and a name, optional property annotations and an optional default value. + +A type reference in Avro IDL must be one of: + +* A primitive type +* A logical type +* A named schema defined prior to this usage in the same Protocol +* A complex type (array, map, or union) + +### Primitive Types +The primitive types supported by Avro IDL are the same as those supported by Avro's JSON format. This list includes _int_, _long_, _string_, _boolean_, _float_, _double_, _null_, and _bytes_. + +### Logical Types +Some of the logical types supported by Avro's JSON format are also supported by Avro IDL. The currently supported types are: + +* _decimal_ (logical type [decimal]({{< relref "../specification#decimal" >}})) +* _date_ (logical type [date]({{< relref "../specification#date" >}})) +* _time_ms_ (logical type [time-millis]({{< relref "../specification#time-millisecond-precision" >}})) +* _timestamp_ms_ (logical type [timestamp-millis]({{< relref "../specification#timestamp-millisecond-precision" >}})) +* _uuid_ (logical type [uuid]({{< relref "../specification#uuid" >}})) + +For example: +```java +record Job { + string jobid; + date submitDate; + time_ms submitTime; + timestamp_ms finishTime; + decimal(9,2) finishRatio; + uuid pk = "a1a2a3a4-b1b2-c1c2-d1d2-d3d4d5d6d7d8"; +} +``` + +Logical types can also be specified via an annotation, which is useful for logical types for which a keyword does not exist: + +```java +record Job { + string jobid; + @logicalType("timestamp-micros") + long finishTime; +} +``` + +### References to Named Schemata +If a named schema has already been defined in the same Avro IDL file, it may be referenced by name as if it were a primitive type: +```java +record Card { + Suit suit; // refers to the enum Card defined above + int number; +} +``` + +### Default Values +Default values for fields may be optionally specified by using an equals sign after the field name followed by a JSON expression indicating the default value. This JSON is interpreted as described in the [spec]({{< relref "../specification#schema-record" >}}). + +### Complex Types + +#### Arrays +Array types are written in a manner that will seem familiar to C++ or Java programmers. An array of any type t is denoted `array`. For example, an array of strings is denoted `array`, and a multidimensional array of Foo records would be `array>`. + +#### Maps +Map types are written similarly to array types. An array that contains values of type t is written `map`. As in the JSON schema format, all maps contain `string`-type keys. + +#### Unions +Union types are denoted as `union { typeA, typeB, typeC, ... }`. For example, this record contains a string field that is optional (unioned with null), and a field containing either a precise or a imprecise number: +```java +record RecordWithUnion { + union { null, string } optionalString; + union { decimal(12, 6), float } number; +} +``` +Note that the same restrictions apply to Avro IDL unions as apply to unions defined in the JSON format; namely, a record may not contain multiple elements of the same type. Also, fields/parameters that use the union type and have a default parameter must specify a default value of the same type as the **first** union type. + +Because it occurs so often, there is a special shorthand to denote a union of `null` with another type. In the following snippet, the first three fields have identical types: + +```java +record RecordWithUnion { + union { null, string } optionalString1 = null; + string? optionalString2 = null; + string? optionalString3; // No default value + string? optionalString4 = "something"; +} +``` + +Note that unlike explicit unions, the position of the `null` type is fluid; it will be the first or last type depending on the default value (if any). So in the example above, all fields are valid. + +## Defining RPC Messages +The syntax to define an RPC message within a Avro IDL protocol is similar to the syntax for a method declaration within a C header file or a Java interface. To define an RPC message add which takes two arguments named _foo_ and _bar_, returning an _int_, simply include the following definition within the protocol: +```java +int add(int foo, int bar = 0); +``` +Message arguments, like record fields, may specify default values. + +To define a message with no response, you may use the alias _void_, equivalent to the Avro _null_ type: +```java +void logMessage(string message); +``` +If you have previously defined an error type within the same protocol, you may declare that a message can throw this error using the syntax: +```java +void goKaboom() throws Kaboom; +``` +To define a one-way message, use the keyword `oneway` after the parameter list, for example: +```java +void fireAndForget(string message) oneway; +``` + +## Other Language Features + +### Comments +All Java-style comments are supported within a Avro IDL file. Any text following _//_ on a line is ignored, as is any text between _/*_ and _*/_, possibly spanning multiple lines. + +Comments that begin with _/**_ are used as the documentation string for the type or field definition that follows the comment. + +### Escaping Identifiers +Occasionally, one will need to use a reserved language keyword as an identifier. In order to do so, backticks (`) may be used to escape the identifier. For example, to define a message with the literal name error, you may write: +```java +void `error`(); +``` +This syntax is allowed anywhere an identifier is expected. + +### Annotations for Ordering and Namespaces +Java-style annotations may be used to add additional properties to types and fields throughout Avro IDL. + +For example, to specify the sort order of a field within a record, one may use the `@order` annotation before the field name as follows: +```java +record MyRecord { + string @order("ascending") myAscendingSortField; + string @order("descending") myDescendingField; + string @order("ignore") myIgnoredField; +} +``` +A field's type (with the exception of type references) may also be preceded by annotations, e.g.: +```java +record MyRecord { + @java-class("java.util.ArrayList") array myStrings; +} +``` +This can be used to support java classes that can be serialized/deserialized via their `toString`/`String constructor`, e.g.: +```java +record MyRecord { + @java-class("java.math.BigDecimal") string value; + @java-key-class("java.io.File") map fileStates; + array<@java-class("java.math.BigDecimal") string> weights; +} +``` +Similarly, a `@namespace` annotation may be used to modify the namespace when defining a named schema. For example: +```java +@namespace("org.apache.avro.firstNamespace") +protocol MyProto { + @namespace("org.apache.avro.someOtherNamespace") + record Foo {} + + record Bar {} +} +``` +will define a protocol in the _firstNamespace_ namespace. The record _Foo_ will be defined in _someOtherNamespace_ and _Bar_ will be defined in _firstNamespace_ as it inherits its default from its container. + +Type and field aliases are specified with the `@aliases` annotation as follows: +```java +@aliases(["org.old.OldRecord", "org.ancient.AncientRecord"]) +record MyRecord { + string @aliases(["oldField", "ancientField"]) myNewField; +} +``` +Some annotations like those listed above are handled specially. All other annotations are added as properties to the protocol, message, schema or field. + +## Complete Example +The following is an example of an Avro IDL file that shows most of the above features: +```java +/* +* Header with license information. +*/ + +/** + * An example protocol in Avro IDL + */ +@namespace("org.apache.avro.test") +protocol Simple { + /** Documentation for the enum type Kind */ + @aliases(["org.foo.KindOf"]) + enum Kind { + FOO, + BAR, // the bar enum value + BAZ + } = FOO; // For schema evolution purposes, unmatched values do not throw an error, but are resolved to FOO. + + /** MD5 hash; good enough to avoid most collisions, and smaller than (for example) SHA256. */ + fixed MD5(16); + + record TestRecord { + /** Record name; has no intrinsic order */ + string @order("ignore") name; + + Kind @order("descending") kind; + + MD5 hash; + + /* + Note that 'null' is the first union type. Just like .avsc / .avpr files, the default value must be of the first union type. + */ + union { null, MD5 } /** Optional field */ @aliases(["hash"]) nullableHash = null; + + array arrayOfLongs; + } + + /** Errors are records that can be thrown from a method */ + error TestError { + string message; + } + + string hello(string greeting); + /** Return what was given. Demonstrates the use of backticks to name types/fields/messages/parameters after keywords */ + TestRecord echo(TestRecord `record`); + int add(int arg1, int arg2); + bytes echoBytes(bytes data); + void `error`() throws TestError; + // The oneway keyword forces the method to return null. + void ping() oneway; +} +``` +Additional examples may be found in the Avro source tree under the `src/test/idl/input` directory. + +## IDE support + +There are several editors and IDEs that support Avro IDL files, usually via plugins. + +### JetBrains + +Apache Avro IDL Schema Support 203.1.2 was released in 9 December 2021. + +Features: +* Syntax Highlighting +* Code Completion +* Code Formatting +* Error Highlighting +* Inspections & quick fixes +* JSON schemas for .avpr and .avsc files + +It's available via the [JetBrains Marketplace](https://plugins.jetbrains.com/plugin/15728-apache-avro-idl-schema-support) +and on [GitHub](https://github.com/opwvhk/avro-schema-support). + +The plugin supports almost the all JetBrains products: IntelliJ IDEA, PyCharm, WebStorm, Android Studio, AppCode, GoLand, Rider, CLion, RubyMine, PhpStorm, DataGrip, DataSpell, MPS, Code With Me Guest and JetBrains Client. + +Only JetBrains Gateway does not support this plugin directly. But the backend (JetBrains) IDE that it connects to does. + +### Eclipse + +Avroclipse 0.0.11 was released on 4 December 2019. + +Features: +* Syntax Highlighting +* Error Highlighting +* Code Completion + +It is available on the [Eclipse Marketplace](https://marketplace.eclipse.org/content/avroclipse) +and [GitHub](https://github.com/dvdkruk/avroclipse). + +### Visual Studio Code + +avro-idl 0.5.0 was released on 16 June 2021. It provides syntax highlighting. + +It is available on the [VisualStudio Marketplace](https://marketplace.visualstudio.com/items?itemName=streetsidesoftware.avro) +and [GitHub](https://github.com/Jason3S/vscode-avro-ext) + +### Atom.io + +atom-language-avro 0.0.13 was released on 14 August 2015. It provides syntax highlighting. + +It is available as [Atom.io package](https://atom.io/packages/atom-language-avro) +and [GitHub](https://github.com/jonesetc/atom-language-avro) + +### Vim + +A `.avdl` detecting plugin by Gurpreet Atwal on [GitHub](https://github.com/gurpreetatwal/vim-avro) (Last change in December 2016) + +[avro-idl.vim](https://github.com/apache/avro/blob/master/share/editors/avro-idl.vim) in the Avro repository `share/editors` directory (last change in September 2010) + +Both provide syntax highlighting. diff --git a/doc/content/en/docs/1.11.4/MapReduce guide/_index.md b/doc/content/en/docs/1.11.4/MapReduce guide/_index.md new file mode 100644 index 00000000000..2540ff82204 --- /dev/null +++ b/doc/content/en/docs/1.11.4/MapReduce guide/_index.md @@ -0,0 +1,396 @@ +--- +title: "MapReduce guide" +linkTitle: "MapReduce guide" +weight: 200 +--- + + + +Avro provides a convenient way to represent complex data structures within a Hadoop MapReduce job. Avro data can be used as both input to and output from a MapReduce job, as well as the intermediate format. The example in this guide uses Avro data for all three, but it's possible to mix and match; for instance, MapReduce can be used to aggregate a particular field in an Avro record. + +This guide assumes basic familiarity with both Hadoop MapReduce and Avro. See the [Hadoop documentation](https://hadoop.apache.org/docs/current/) and the [Avro getting started guide](./getting-started-java/) for introductions to these projects. This guide uses the old MapReduce API (`org.apache.hadoop.mapred`) and the new MapReduce API (`org.apache.hadoop.mapreduce`). + +## Setup +The code from this guide is included in the Avro docs under examples/mr-example. The example is set up as a Maven project that includes the necessary Avro and MapReduce dependencies and the Avro Maven plugin for code generation, so no external jars are needed to run the example. In particular, the POM includes the following dependencies: +```xml + + org.apache.avro + avro + 1.11.4 + + + org.apache.avro + avro-mapred + 1.11.4 + + + org.apache.hadoop + hadoop-client + 3.1.2 + +``` +And the following plugin: +```xml + + org.apache.avro + avro-maven-plugin + 1.11.4 + + + generate-sources + + schema + + + ${project.basedir}/../ + ${project.basedir}/target/generated-sources/ + + + + +``` + +If you do not configure the *sourceDirectory* and *outputDirectory* properties, the defaults will be used. The *sourceDirectory* property defaults to *src/main/avro*. The *outputDirectory* property defaults to *target/generated-sources*. You can change the paths to match your project layout. + +Alternatively, Avro jars can be downloaded directly from the Apache Avroâ„ĸ Releases [page](https://avro.apache.org/releases.html). The relevant Avro jars for this guide are *avro-1.11.4.jar* and *avro-mapred-1.11.4.jar*, as well as *avro-tools-1.11.4.jar* for code generation and viewing Avro data files as JSON. In addition, you will need to install Hadoop in order to use MapReduce. + +## Example: ColorCount +Below is a simple example of a MapReduce that uses Avro. There is an example for both the old (org.apache.hadoop.mapred) and new (org.apache.hadoop.mapreduce) APIs under *examples/mr-example/src/main/java/example/*. _MapredColorCount_ is the example for the older mapred API while _MapReduceColorCount_ is the example for the newer mapreduce API. Both examples are below, but we will detail the mapred API in our subsequent examples. + +MapredColorCount.java: +```java +package example; + +import java.io.IOException; + +import org.apache.avro.*; +import org.apache.avro.Schema.Type; +import org.apache.avro.mapred.*; +import org.apache.hadoop.conf.*; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapred.*; +import org.apache.hadoop.util.*; + +import example.avro.User; + +public class MapredColorCount extends Configured implements Tool { + + public static class ColorCountMapper extends AvroMapper> { + @Override + public void map(User user, AvroCollector> collector, Reporter reporter) + throws IOException { + CharSequence color = user.getFavoriteColor(); + // We need this check because the User.favorite_color field has type ["string", "null"] + if (color == null) { + color = "none"; + } + collector.collect(new Pair(color, 1)); + } + } + + public static class ColorCountReducer extends AvroReducer> { + @Override + public void reduce(CharSequence key, Iterable values, + AvroCollector> collector, + Reporter reporter) + throws IOException { + int sum = 0; + for (Integer value : values) { + sum += value; + } + collector.collect(new Pair(key, sum)); + } + } + + public int run(String[] args) throws Exception { + if (args.length != 2) { + System.err.println("Usage: MapredColorCount "); + return -1; + } + + JobConf conf = new JobConf(getConf(), MapredColorCount.class); + conf.setJobName("colorcount"); + + FileInputFormat.setInputPaths(conf, new Path(args[0])); + FileOutputFormat.setOutputPath(conf, new Path(args[1])); + + AvroJob.setMapperClass(conf, ColorCountMapper.class); + AvroJob.setReducerClass(conf, ColorCountReducer.class); + + // Note that AvroJob.setInputSchema and AvroJob.setOutputSchema set + // relevant config options such as input/output format, map output + // classes, and output key class. + AvroJob.setInputSchema(conf, User.getClassSchema()); + AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING), + Schema.create(Type.INT))); + + JobClient.runJob(conf); + return 0; + } + + public static void main(String[] args) throws Exception { + int res = ToolRunner.run(new Configuration(), new MapredColorCount(), args); + System.exit(res); + } +} +``` + +MapReduceColorCount.java: +```java +package example; + +import java.io.IOException; + +import org.apache.avro.Schema; +import org.apache.avro.mapred.AvroKey; +import org.apache.avro.mapred.AvroValue; +import org.apache.avro.mapreduce.AvroJob; +import org.apache.avro.mapreduce.AvroKeyInputFormat; +import org.apache.avro.mapreduce.AvroKeyValueOutputFormat; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +import example.avro.User; + +public class MapReduceColorCount extends Configured implements Tool { + + public static class ColorCountMapper extends + Mapper, NullWritable, Text, IntWritable> { + + @Override + public void map(AvroKey key, NullWritable value, Context context) + throws IOException, InterruptedException { + + CharSequence color = key.datum().getFavoriteColor(); + if (color == null) { + color = "none"; + } + context.write(new Text(color.toString()), new IntWritable(1)); + } + } + + public static class ColorCountReducer extends + Reducer, AvroValue> { + + @Override + public void reduce(Text key, Iterable values, + Context context) throws IOException, InterruptedException { + + int sum = 0; + for (IntWritable value : values) { + sum += value.get(); + } + context.write(new AvroKey(key.toString()), new AvroValue(sum)); + } + } + + public int run(String[] args) throws Exception { + if (args.length != 2) { + System.err.println("Usage: MapReduceColorCount "); + return -1; + } + + Job job = new Job(getConf()); + job.setJarByClass(MapReduceColorCount.class); + job.setJobName("Color Count"); + + FileInputFormat.setInputPaths(job, new Path(args[0])); + FileOutputFormat.setOutputPath(job, new Path(args[1])); + + job.setInputFormatClass(AvroKeyInputFormat.class); + job.setMapperClass(ColorCountMapper.class); + AvroJob.setInputKeySchema(job, User.getClassSchema()); + job.setMapOutputKeyClass(Text.class); + job.setMapOutputValueClass(IntWritable.class); + + job.setOutputFormatClass(AvroKeyValueOutputFormat.class); + job.setReducerClass(ColorCountReducer.class); + AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); + AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); + + return (job.waitForCompletion(true) ? 0 : 1); + } + + public static void main(String[] args) throws Exception { + int res = ToolRunner.run(new MapReduceColorCount(), args); + System.exit(res); + } +} +``` +ColorCount reads in data files containing *User* records, defined in _examples/user.avsc_, and counts the number of instances of each favorite color. (This example draws inspiration from the canonical _WordCount_ MapReduce application.) This example uses the old MapReduce API. See MapReduceAvroWordCount, found under _doc/examples/mr-example/src/main/java/example/_ to see the new MapReduce API example. The User schema is defined as follows: +```json +{"namespace": "example.avro", + "type": "record", + "name": "User", + "fields": [ + {"name": "name", "type": "string"}, + {"name": "favorite_number", "type": ["int", "null"]}, + {"name": "favorite_color", "type": ["string", "null"]} + ] +} +``` +This schema is compiled into the *User* class used by *ColorCount* via the Avro Maven plugin (see _examples/mr-example/pom.xml_ for how this is set up). + +*ColorCountMapper* essentially takes a *User* as input and extracts the User's favorite color, emitting the key-value pair ``. _ColorCountReducer_ then adds up how many occurrences of a particular favorite color were emitted, and outputs the result as a Pair record. These Pairs are serialized to an Avro data file. + +## Running ColorCount +The _ColorCount_ application is provided as a Maven project in the Avro docs under _examples/mr-example_. To build the project, including the code generation of the User schema, run: +```shell +mvn compile +``` +Next, run _GenerateData_ from `examples/mr-examples` to create an Avro data file, `input/users.avro`, containing 20 Users with favorite colors chosen randomly from a list: +```shell +mvn exec:java -q -Dexec.mainClass=example.GenerateData +``` +Besides creating the data file, GenerateData prints the JSON representations of the Users generated to stdout, for example: +```json +{"name": "user", "favorite_number": null, "favorite_color": "red"} +{"name": "user", "favorite_number": null, "favorite_color": "green"} +{"name": "user", "favorite_number": null, "favorite_color": "purple"} +{"name": "user", "favorite_number": null, "favorite_color": null} +... +``` +Now we're ready to run ColorCount. We specify our freshly-generated input folder as the input path and output as our output folder (note that MapReduce will not start a job if the output folder already exists): +```shell +mvn exec:java -q -Dexec.mainClass=example.MapredColorCount -Dexec.args="input output" +``` +Once ColorCount completes, checking the contents of the new output directory should yield the following: +```shell +$ ls output/ +part-00000.avro _SUCCESS +``` +You can check the contents of the generated Avro file using the avro-tools jar: +```shell +$ java -jar /path/to/avro-tools-1.11.4.jar tojson output/part-00000.avro +{"value": 3, "key": "blue"} +{"value": 7, "key": "green"} +{"value": 1, "key": "none"} +{"value": 2, "key": "orange"} +{"value": 3, "key": "purple"} +{"value": 2, "key": "red"} +{"value": 2, "key": "yellow"} +``` +Now let's go over the ColorCount example in detail. + +## AvroMapper - org.apache.hadoop.mapred API + +The easiest way to use Avro data files as input to a MapReduce job is to subclass `AvroMapper`. An `AvroMapper` defines a `map` function that takes an Avro datum as input and outputs a key/value pair represented as a Pair record. In the ColorCount example, ColorCountMapper is an AvroMapper that takes a User as input and outputs a `Pair>`, where the CharSequence key is the user's favorite color and the Integer value is 1. +```java +public static class ColorCountMapper extends AvroMapper> { + @Override + public void map(User user, AvroCollector> collector, Reporter reporter) + throws IOException { + CharSequence color = user.getFavoriteColor(); + // We need this check because the User.favorite_color field has type ["string", "null"] + if (color == null) { + color = "none"; + } + collector.collect(new Pair(color, 1)); + } +} +``` +In order to use our AvroMapper, we must call AvroJob.setMapperClass and AvroJob.setInputSchema. +```java +AvroJob.setMapperClass(conf, ColorCountMapper.class); +AvroJob.setInputSchema(conf, User.getClassSchema()); +``` +Note that `AvroMapper` does not implement the `Mapper` interface. Under the hood, the specified Avro data files are deserialized into AvroWrappers containing the actual data, which are processed by a Mapper that calls the configured AvroMapper's map function. AvroJob.setInputSchema sets up the relevant configuration parameters needed to make this happen, thus you should not need to call `JobConf.setMapperClass`, `JobConf.setInputFormat`, `JobConf.setMapOutputKeyClass`, `JobConf.setMapOutputValueClass`, or `JobConf.setOutputKeyComparatorClass`. + +## Mapper - org.apache.hadoop.mapreduce API +This document will not go into all the differences between the mapred and mapreduce APIs, however will describe the main differences. As you can see, ColorCountMapper is now a subclass of the Hadoop Mapper class and is passed an AvroKey as it's key. Additionally, the AvroJob method calls were slightly changed. +```java + public static class ColorCountMapper extends + Mapper, NullWritable, Text, IntWritable> { + + @Override + public void map(AvroKey key, NullWritable value, Context context) + throws IOException, InterruptedException { + + CharSequence color = key.datum().getFavoriteColor(); + if (color == null) { + color = "none"; + } + context.write(new Text(color.toString()), new IntWritable(1)); + } + } +``` + +## AvroReducer - org.apache.hadoop.mapred API +Analogously to AvroMapper, an AvroReducer defines a reducer function that takes the key/value types output by an AvroMapper (or any mapper that outputs Pairs) and outputs a key/value pair represented a Pair record. In the ColorCount example, ColorCountReducer is an AvroReducer that takes the CharSequence key representing a favorite color and the `Iterable` representing the counts for that color (they should all be 1 in this example) and adds up the counts. +```java +public static class ColorCountReducer extends AvroReducer> { + @Override + public void reduce(CharSequence key, Iterable values, + AvroCollector> collector, + Reporter reporter) + throws IOException { + int sum = 0; + for (Integer value : values) { + sum += value; + } + collector.collect(new Pair(key, sum)); + } +} +``` +In order to use our AvroReducer, we must call AvroJob.setReducerClass and AvroJob.setOutputSchema. +```java +AvroJob.setReducerClass(conf, ColorCountReducer.class); +AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING), + Schema.create(Type.INT))); +``` +Note that _AvroReducer_ does not implement the _Reducer_ interface. The intermediate Pairs output by the mapper are split into _AvroKeys_ and _AvroValues_, which are processed by a Reducer that calls the configured AvroReducer's `reduce` function. `AvroJob.setOutputSchema` sets up the relevant configuration parameters needed to make this happen, thus you should not need to call `JobConf.setReducerClass`, `JobConf.setOutputFormat`, `JobConf.setOutputKeyClass`, `JobConf.setMapOutputKeyClass`, `JobConf.setMapOutputValueClass`, or `JobConf.setOutputKeyComparatorClass`. + +## Reduce - org.apache.hadoop.mapreduce API +As before we not detail every difference between the APIs. As with the _Mapper_ change _ColorCountReducer_ is now a subclass of _Reducer_ and _AvroKey_ and _AvroValue_ are emitted. Additionally, the _AvroJob_ method calls were slightly changed. +```java + public static class ColorCountReducer extends + Reducer, AvroValue> { + + @Override + public void reduce(Text key, Iterable values, + Context context) throws IOException, InterruptedException { + + int sum = 0; + for (IntWritable value : values) { + sum += value.get(); + } + context.write(new AvroKey(key.toString()), new AvroValue(sum)); + } + } +``` + +## Learning more +The mapred API allows users to mix Avro AvroMappers and AvroReducers with non-Avro Mappers and Reducers and the mapreduce API allows users input Avro and output non-Avro or vice versa. + +The mapred package has API org.apache.avro.mapred documentation as does the `org.apache.avro.mapreduce` package. MapReduce API (`org.apache.hadoop.mapreduce`). Similarily to the mapreduce package, it's possible with the mapred API to implement your own Mappers and Reducers directly using the public classes provided in these libraries. See the `AvroWordCount` application, found under _examples/mr-example/src/main/java/example/AvroWordCount.java_ in the Avro documentation, for an example of implementing a Reducer that outputs Avro data using the old MapReduce API. See the `MapReduceAvroWordCount` application, found under _examples/mr-example/src/main/java/example/MapReduceAvroWordCount.java_ in the Avro documentation, for an example of implementing a Reducer that outputs Avro data using the new MapReduce API. diff --git a/doc/content/en/docs/1.11.4/SASL profile/_index.md b/doc/content/en/docs/1.11.4/SASL profile/_index.md new file mode 100644 index 00000000000..67c316e221c --- /dev/null +++ b/doc/content/en/docs/1.11.4/SASL profile/_index.md @@ -0,0 +1,93 @@ +--- +title: "SASL profile" +linkTitle: "SASL profile" +weight: 202 +--- + + + +## Introduction +SASL ([RFC 2222](https://www.ietf.org/rfc/rfc2222.txt)) provides a framework for authentication and security of network protocols. Each protocol that uses SASL is meant to define a SASL profile. This document provides a SASL profile for connection-based Avro RPC. + +## Overview +SASL negotiation proceeds as a series of message interactions over a connection between a client and server using a selected SASL mechanism. The client starts this negotiation by sending its chosen mechanism name with an initial (possibly empty) message. Negotiation proceeds with the exchange of messages until either side indicates success or failure. The content of the messages is mechanism-specific. If the negotiation succeeds, then the session can proceed over the connection, otherwise it must be abandoned. + +Some mechanisms continue to process session data after negotiation (e.g., encrypting it), while some specify that further session data is transmitted unmodifed. + +## Negotiation + +### Commands +Avro SASL negotiation uses four one-byte commands. + +* 0: START Used in a client's initial message. +* 1: CONTINUE Used while negotiation is ongoing. +* 2: FAIL Terminates negotiation unsuccessfully. +* 3: COMPLETE Terminates negotiation sucessfully. + +The format of a START message is: + +`| 0 | 4-byte mechanism name length | mechanism name | 4-byte payload length | payload data |` + +The format of a CONTINUE message is: + +`| 1 | 4-byte payload length | payload data |` + +The format of a FAIL message is: + +`| 2 | 4-byte message length | UTF-8 message |` + +The format of a COMPLETE message is: + +`| 3 | 4-byte payload length | payload data |` + +### Process +Negotiation is initiated by a client sending a START command containing the client's chosen mechanism name and any mechanism-specific payload data. + +The server and client then interchange some number (possibly zero) of CONTINUE messages. Each message contains payload data that is processed by the security mechanism to generate the next message. + +Once either the client or server send a FAIL message then negotiation has failed. UTF-8-encoded text is included in the failure message. Once either a FAIL message has been sent or received, or any other error occurs in the negotiation, further communication on this connection must cease. + +Once either the client or server send a COMPLETE message then negotiation has completed successfully. Session data may now be transmitted over the connection until it is closed by either side. + +## Session Data +If no SASL QOP (quality of protection) is negotiated, then all subsequent writes to/reads over this connection are written/read unmodified. In particular, messages use Avro [framing](#Message+Framing), and are of the form: + +`| 4-byte frame length | frame data | ... | 4 zero bytes |` + +If a SASL QOP is negotiated, then it must be used by the connection for all subsequent messages. This is done by wrapping each non-empty frame written using the security mechanism and unwrapping each non-empty frame read. The length written in each non-empty frame is the length of the wrapped data. Complete frames must be passed to the security mechanism for unwrapping. Unwrapped data is then passed to the application as the content of the frame. + +If at any point processing fails due to wrapping, unwrapping or framing errors, then all further communication on this connection must cease. + +## Anonymous Mechanism +The SASL anonymous mechanism ([RFC 2245](https://www.ietf.org/rfc/rfc2222.txt)) is quite simple to implement. In particular, an initial anonymous request may be prefixed by the following static sequence: + +`| 0 | 0009 | ANONYMOUS | 0000 |` + +If a server uses the anonymous mechanism, it should check that the mechanism name in the start message prefixing the first request received is 'ANONYMOUS', then simply prefix its initial response with a COMPLETE message of: + +`| 3 | 0000 |` + +If an anonymous server recieves some other mechanism name, then it may respond with a FAIL message as simple as: + +`| 2 | 0000 |` + +Note that the anonymous mechanism need add no additional round-trip messages between client and server. The START message can be piggybacked on the initial request and the COMPLETE or FAIL message can be piggybacked on the initial response. diff --git a/doc/content/en/docs/1.11.4/Specification/_index.md b/doc/content/en/docs/1.11.4/Specification/_index.md new file mode 100755 index 00000000000..7cc5a17547e --- /dev/null +++ b/doc/content/en/docs/1.11.4/Specification/_index.md @@ -0,0 +1,848 @@ +--- +title: "Specification" +linkTitle: "Specification" +weight: 4 +date: 2021-10-25 +aliases: +- spec.html +--- + + + +## Introduction +This document defines Apache Avro. It is intended to be the authoritative specification. Implementations of Avro must adhere to this document. + +## Schema Declaration {#schema-declaration} +A Schema is represented in [JSON](https://www.json.org/) by one of: + +* A JSON string, naming a defined type. +* A JSON object, of the form: +```js +{"type": "typeName", ...attributes...} +``` +where _typeName_ is either a primitive or derived type name, as defined below. Attributes not defined in this document are permitted as metadata, but must not affect the format of serialized data. +* A JSON array, representing a union of embedded types. + +## Primitive Types +The set of primitive type names is: + +* _null_: no value +* _boolean_: a binary value +* _int_: 32-bit signed integer +* _long_: 64-bit signed integer +* _float_: single precision (32-bit) IEEE 754 floating-point number +* _double_: double precision (64-bit) IEEE 754 floating-point number +* _bytes_: sequence of 8-bit unsigned bytes +* _string_: unicode character sequence + +Primitive types have no specified attributes. + +Primitive type names are also defined type names. Thus, for example, the schema "string" is equivalent to: +```json +{"type": "string"} +``` + +## Complex Types +Avro supports six kinds of complex types: _records_, _enums_, _arrays_, _maps_, _unions_ and _fixed_. + +### Records {#schema-record} +Records use the type name "record" and support the following attributes: + +* _name_: a JSON string providing the name of the record (required). +* _namespace_, a JSON string that qualifies the name (optional); +* _doc_: a JSON string providing documentation to the user of this schema (optional). +* _aliases_: a JSON array of strings, providing alternate names for this record (optional). +* _fields_: a JSON array, listing fields (required). Each field is a JSON object with the following attributes: + * _name_: a JSON string providing the name of the field (required), and + * _doc_: a JSON string describing this field for users (optional). + * _type_: a [schema]({{< ref "#schema-declaration" >}} "Schema declaration"), as defined above + * _order_: specifies how this field impacts sort ordering of this record (optional). Valid values are "ascending" (the default), "descending", or "ignore". For more details on how this is used, see the sort order section below. + * _aliases_: a JSON array of strings, providing alternate names for this field (optional). + * _default_: A default value for this field, only used when reading instances that lack the field for schema evolution purposes. The presence of a default value does not make the field optional at encoding time. Permitted values depend on the field's schema type, according to the table below. Default values for union fields correspond to the first schema in the union. Default values for bytes and fixed fields are JSON strings, where Unicode code points 0-255 are mapped to unsigned 8-bit byte values 0-255. Avro encodes a field even if its value is equal to its default. + +*field default values* + +| **avro type** | **json type** | **example** | +|---------------|----------------|-------------| +| null | null | `null` | +| boolean | boolean | `true` | +| int,long | integer | `1` | +| float,double | number | `1.1` | +| bytes | string | `"\u00FF"` | +| string | string | `"foo"` | +| record | object | `{"a": 1}` | +| enum | string | `"FOO"` | +| array | array | `[1]` | +| map | object | `{"a": 1}` | +| fixed | string | `"\u00ff"` | + +For example, a linked-list of 64-bit values may be defined with: +```jsonc +{ + "type": "record", + "name": "LongList", + "aliases": ["LinkedLongs"], // old name for this + "fields" : [ + {"name": "value", "type": "long"}, // each element has a long + {"name": "next", "type": ["null", "LongList"]} // optional next element + ] +} +``` + +### Enums +Enums use the type name "enum" and support the following attributes: + +* _name_: a JSON string providing the name of the enum (required). +* _namespace_, a JSON string that qualifies the name (optional); +* _aliases_: a JSON array of strings, providing alternate names for this enum (optional). +* _doc_: a JSON string providing documentation to the user of this schema (optional). +* _symbols_: a JSON array, listing symbols, as JSON strings (required). All symbols in an enum must be unique; duplicates are prohibited. Every symbol must match the regular expression [A-Za-z_][A-Za-z0-9_]* (the same requirement as for [names]({{< ref "#names" >}} "Names")). +* _default_: A default value for this enumeration, used during resolution when the reader encounters a symbol from the writer that isn't defined in the reader's schema (optional). The value provided here must be a JSON string that's a member of the symbols array. See documentation on schema resolution for how this gets used. + +For example, playing card suits might be defined with: +```json +{ + "type": "enum", + "name": "Suit", + "symbols" : ["SPADES", "HEARTS", "DIAMONDS", "CLUBS"] +} +``` + +### Arrays +Arrays use the type name "array" and support a single attribute: + +* _items_: the schema of the array's items. + +For example, an array of strings is declared with: +```json +{ + "type": "array", + "items" : "string", + "default": [] +} +``` + +### Maps +Maps use the type name "map" and support one attribute: + +* _values_: the schema of the map's values. + +Map keys are assumed to be strings. + +For example, a map from string to long is declared with: +```json +{ + "type": "map", + "values" : "long", + "default": {} +} +``` + +### Unions +Unions, as mentioned above, are represented using JSON arrays. For example, `["null", "string"]` declares a schema which may be either a null or string. + +(Note that when a [default value]({{< ref "#schema-record" >}} "Schema record") is specified for a record field whose type is a union, the type of the default value must match the first element of the union. Thus, for unions containing "null", the "null" is usually listed first, since the default value of such unions is typically null.) + +Unions may not contain more than one schema with the same type, except for the named types record, fixed and enum. For example, unions containing two array types or two map types are not permitted, but two types with different names are permitted. (Names permit efficient resolution when reading and writing unions.) + +Unions may not immediately contain other unions. + +### Fixed +Fixed uses the type name "fixed" and supports the following attributes: + +* _name_: a string naming this fixed (required). +* _namespace_, a string that qualifies the name (optional); +* _aliases_: a JSON array of strings, providing alternate names for this enum (optional). +* _size_: an integer, specifying the number of bytes per value (required). + +For example, 16-byte quantity may be declared with: +```json +{"type": "fixed", "size": 16, "name": "md5"} +``` + +### Names {#names} +Record, enums and fixed are named types. Each has a fullname that is composed of two parts; a name and a namespace, separated by a dot. Equality of names is defined on the fullname. + +Record fields and enum symbols have names as well (but no namespace). Equality of fields and enum symbols is defined on the name of the field/symbol within its scope (the record/enum that defines it). Fields and enum symbols across scopes are never equal. + +The name portion of the fullname of named types, record field names, and enum symbols must: + +* start with [A-Za-z_] +* subsequently contain only [A-Za-z0-9_] + +A namespace is a dot-separated sequence of such names. The empty string may also be used as a namespace to indicate the null namespace. Equality of names (including field names and enum symbols) as well as fullnames is case-sensitive. + +The null namespace may not be used in a dot-separated sequence of names. So the grammar for a namespace is: +``` + | [()*] +``` + +In record, enum and fixed definitions, the fullname is determined according to the algorithm below the example: + +``` +{ + "type": "record", + "name": "Example", + "doc": "A simple name (attribute) and no namespace attribute: use the null namespace (\"\"); the fullname is 'Example'.", + "fields": [ + { + "name": "inheritNull", + "type": { + "type": "enum", + "name": "Simple", + "doc": "A simple name (attribute) and no namespace attribute: inherit the null namespace of the enclosing type 'Example'. The fullname is 'Simple'.", + "symbols": ["a", "b"] + } + }, { + "name": "explicitNamespace", + "type": { + "type": "fixed", + "name": "Simple", + "namespace": "explicit", + "doc": "A simple name (attribute) and a namespace (attribute); the fullname is 'explicit.Simple' (this is a different type than of the 'inheritNull' field).", + "size": 12 + } + }, { + "name": "fullName", + "type": { + "type": "record", + "name": "a.full.Name", + "namespace": "ignored", + "doc": "A name attribute with a fullname, so the namespace attribute is ignored. The fullname is 'a.full.Name', and the namespace is 'a.full'.", + "fields": [ + { + "name": "inheritNamespace", + "type": { + "type": "enum", + "name": "Understanding", + "doc": "A simple name (attribute) and no namespace attribute: inherit the namespace of the enclosing type 'a.full.Name'. The fullname is 'a.full.Understanding'.", + "symbols": ["d", "e"] + } + } + ] + } + } + ] +} +``` + +The fullname of a record, enum or fixed definition is determined by the required `name` and optional `namespace` attributes like this: + +* A fullname is specified. If the name specified contains a dot, then it is assumed to be a fullname, and any namespace also specified is ignored. For example, use "name": "org.foo.X" to indicate the fullname org.foo.X. +* A simple name (a name that contains no dots) and namespace are both specified. For example, one might use "name": "X", "namespace": "org.foo" to indicate the fullname org.foo.X. +* A simple name only is specified (a name that contains no dots). In this case the namespace is taken from the most tightly enclosing named schema or protocol, and the fullname is constructed from that namespace and the name. For example, if "name": "X" is specified, and this occurs within a field of the record definition of org.foo.Y, then the fullname is org.foo.X. This also happens if there is no enclosing namespace (i.e., the enclosing schema definition has the null namespace). + +References to previously defined names are as in the latter two cases above: if they contain a dot they are a fullname, if they do not contain a dot, the namespace is the namespace of the enclosing definition. + +Primitive type names (`null`, `boolean`, `int`, `long`, `float`, `double`, `bytes`, `string`) have no namespace and their names may not be defined in any namespace. + +Complex types (`record`, `enum`, `array`, `map`, `fixed`) have no namespace, but their names (as well as `union`) are permitted to be reused as type names. This can be confusing to the human reader, but is always unambiguous for binary serialization. Due to the limitations of JSON encoding, it is a best practice to use a namespace when using these names. + +A schema or protocol may not contain multiple definitions of a fullname. Further, a name must be defined before it is used ("before" in the depth-first, left-to-right traversal of the JSON parse tree, where the types attribute of a protocol is always deemed to come "before" the messages attribute.) + +### Aliases +Named types and fields may have aliases. An implementation may optionally use aliases to map a writer's schema to the reader's. This facilitates both schema evolution as well as processing disparate datasets. + +Aliases function by re-writing the writer's schema using aliases from the reader's schema. For example, if the writer's schema was named "Foo" and the reader's schema is named "Bar" and has an alias of "Foo", then the implementation would act as though "Foo" were named "Bar" when reading. Similarly, if data was written as a record with a field named "x" and is read as a record with a field named "y" with alias "x", then the implementation would act as though "x" were named "y" when reading. + +A type alias may be specified either as a fully namespace-qualified, or relative to the namespace of the name it is an alias for. For example, if a type named "a.b" has aliases of "c" and "x.y", then the fully qualified names of its aliases are "a.c" and "x.y". + +## Data Serialization and Deserialization +Binary encoded Avro data does not include type information or field names. The benefit is that the serialized data is small, but as a result a schema must always be used in order to read Avro data correctly. The best way to ensure that the schema is structurally identical to the one used to write the data is to use the exact same schema. + +Therefore, files or systems that store Avro data should always include the writer's schema for that data. Avro-based remote procedure call (RPC) systems must also guarantee that remote recipients of data have a copy of the schema used to write that data. In general, it is advisable that any reader of Avro data should use a schema that is the same (as defined more fully in [Parsing Canonical Form for Schemas]({{< ref "#parsing-canonical-form-for-schemas" >}} "Parsing Canonical Form for Schemas")) as the schema that was used to write the data in order to deserialize it correctly. Deserializing data into a newer schema is accomplished by specifying an additional schema, the results of which are described in [Schema Resolution]({{< ref "#schema-resolution" >}}). + +In general, both serialization and deserialization proceed as a depth-first, left-to-right traversal of the schema, serializing or deserializing primitive types as they are encountered. Therefore, it is possible, though not advisable, to read Avro data with a schema that does not have the same Parsing Canonical Form as the schema with which the data was written. In order for this to work, the serialized primitive values must be compatible, in order value by value, with the items in the deserialization schema. For example, int and long are always serialized the same way, so an int could be deserialized as a long. Since the compatibility of two schemas depends on both the data and the serialization format (eg. binary is more permissive than JSON because JSON includes field names, eg. a long that is too large will overflow an int), it is simpler and more reliable to use schemas with identical Parsing Canonical Form. + +### Encodings +Avro specifies two serialization encodings: binary and JSON. Most applications will use the binary encoding, as it is smaller and faster. But, for debugging and web-based applications, the JSON encoding may sometimes be appropriate. + +### Binary Encoding {#binary-encoding} +Binary encoding does not include field names, self-contained information about the types of individual bytes, nor field or record separators. Therefore readers are wholly reliant on the schema used when the data was encoded. + +#### Primitive Types +Primitive types are encoded in binary as follows: + +* _null_ is written as zero bytes. +* a _boolean_ is written as a single byte whose value is either 0 (false) or 1 (true). +* _int_ and _long_ values are written using [variable-length](https://lucene.apache.org/java/3_5_0/fileformats.html#VInt) [zig-zag](https://code.google.com/apis/protocolbuffers/docs/encoding.html#types) coding. Some examples: + +| *value* | *hex* | +|---|---| +| 0 | 00 | +|-1 | 01 | +| 1 | 02 | +|-2 | 03 | +| 2 | 04 | +|...|...| +|-64 | 7f | +|64 | 80 01| +|...|...| + +* a _float_ is written as 4 bytes. The float is converted into a 32-bit integer using a method equivalent to Java's [floatToIntBits](https://docs.oracle.com/javase/8/docs/api/java/lang/Float.html#floatToIntBits-float-) and then encoded in little-endian format. +* a _double_ is written as 8 bytes. The double is converted into a 64-bit integer using a method equivalent to Java's [doubleToLongBits](https://docs.oracle.com/javase/8/docs/api/java/lang/Double.html#doubleToLongBits-double-) and then encoded in little-endian format. +* _bytes_ are encoded as a long followed by that many bytes of data. +* a _string_ is encoded as a long followed by that many bytes of UTF-8 encoded character data. +For example, the three-character string "foo" would be encoded as the long value 3 (encoded as hex 06) followed by the UTF-8 encoding of 'f', 'o', and 'o' (the hex bytes 66 6f 6f): +``` +06 66 6f 6f +``` + +### Complex Types +Complex types are encoded in binary as follows: + +#### Records +A record is encoded by encoding the values of its fields in the order that they are declared. In other words, a record is encoded as just the concatenation of the encodings of its fields. Field values are encoded per their schema. + +For example, the record schema +```json +{ + "type": "record", + "name": "test", + "fields" : [ + {"name": "a", "type": "long"}, + {"name": "b", "type": "string"} + ] +} +``` + +An instance of this record whose a field has value 27 (encoded as hex 36) and whose b field has value "foo" (encoded as hex bytes 06 66 6f 6f), would be encoded simply as the concatenation of these, namely the hex byte sequence: +``` +36 06 66 6f 6f +``` + +#### Enums +An enum is encoded by a int, representing the zero-based position of the symbol in the schema. + +For example, consider the enum: +```json +{"type": "enum", "name": "Foo", "symbols": ["A", "B", "C", "D"] } +``` + +This would be encoded by an int between zero and three, with zero indicating "A", and 3 indicating "D". + +#### Arrays +Arrays are encoded as a series of blocks. Each block consists of a long count value, followed by that many array items. A block with count zero indicates the end of the array. Each item is encoded per the array's item schema. + +If a block's count is negative, its absolute value is used, and the count is followed immediately by a long block size indicating the number of bytes in the block. This block size permits fast skipping through data, e.g., when projecting a record to a subset of its fields. + +For example, the array schema +```json +{"type": "array", "items": "long"} +``` +an array containing the items 3 and 27 could be encoded as the long value 2 (encoded as hex 04) followed by long values 3 and 27 (encoded as hex 06 36) terminated by zero: +``` +04 06 36 00 +``` + +The blocked representation permits one to read and write arrays larger than can be buffered in memory, since one can start writing items without knowing the full length of the array. + +#### Maps {#schema-maps} +Maps are encoded as a series of _blocks_. Each block consists of a `long` _count_ value, followed by that many key/value pairs. A block with count zero indicates the end of the map. Each item is encoded per the map's value schema. + +If a block's count is negative, its absolute value is used, and the count is followed immediately by a `long` block size indicating the number of bytes in the block. This block size permits fast skipping through data, e.g., when projecting a record to a subset of its fields. + +The blocked representation permits one to read and write maps larger than can be buffered in memory, since one can start writing items without knowing the full length of the map. + +#### Unions +A union is encoded by first writing an `int` value indicating the zero-based position within the union of the schema of its value. The value is then encoded per the indicated schema within the union. + +For example, the union schema `["null","string"]` would encode: + +* _null_ as zero (the index of "null" in the union): +`00` +* the string "a" as one (the index of "string" in the union, 1, encoded as hex 02), followed by the serialized string: +`02 02 61` +NOTE: Currently for C/C++ implementations, the positions are practically an int, but theoretically a long. In reality, we don't expect unions with 215M members + +#### Fixed +Fixed instances are encoded using the number of bytes declared in the schema. + +### JSON Encoding +Except for unions, the JSON encoding is the same as is used to encode [field default values]({{< ref "#schema-record" >}}). + +The value of a union is encoded in JSON as follows: + +* if its type is _null_, then it is encoded as a JSON _null_; +* otherwise it is encoded as a JSON object with one name/value pair whose name is the type's name and whose value is the recursively encoded value. For Avro's named types (record, fixed or enum) the user-specified name is used, for other types the type name is used. + +For example, the union schema `["null","string","Foo"]`, where Foo is a record name, would encode: + +* _null_ as _null_; +* the string "a" as `{"string": "a"}` and +* a Foo instance as `{"Foo": {...}}`, where `{...}` indicates the JSON encoding of a Foo instance. + +Note that the original schema is still required to correctly process JSON-encoded data. For example, the JSON encoding does not distinguish between _int_ and _long_, _float_ and _double_, records and maps, enums and strings, etc. + +### Single-object encoding +In some situations a single Avro serialized object is to be stored for a longer period of time. One very common example is storing Avro records for several weeks in an [Apache Kafka](https://kafka.apache.org/) topic. + +In the period after a schema change this persistence system will contain records that have been written with different schemas. So the need arises to know which schema was used to write a record to support schema evolution correctly. In most cases the schema itself is too large to include in the message, so this binary wrapper format supports the use case more effectively. + +#### Single object encoding specification +Single Avro objects are encoded as follows: + +1. A two-byte marker, `C3 01`, to show that the message is Avro and uses this single-record format (version 1). +1. The 8-byte little-endian CRC-64-AVRO [fingerprint]({{< ref "#schema-fingerprints" >}} "Schema fingerprints") of the object's schema. +1. The Avro object encoded using [Avro's binary encoding]({{< ref "#binary-encoding" >}}). + +Implementations use the 2-byte marker to determine whether a payload is Avro. This check helps avoid expensive lookups that resolve the schema from a fingerprint, when the message is not an encoded Avro payload. + +## Sort Order +Avro defines a standard sort order for data. This permits data written by one system to be efficiently sorted by another system. This can be an important optimization, as sort order comparisons are sometimes the most frequent per-object operation. Note also that Avro binary-encoded data can be efficiently ordered without deserializing it to objects. + +Data items may only be compared if they have identical schemas. Pairwise comparisons are implemented recursively with a depth-first, left-to-right traversal of the schema. The first mismatch encountered determines the order of the items. + +Two items with the same schema are compared according to the following rules. + +* _null_ data is always equal. +* _boolean_ data is ordered with false before true. +* _int_, _long_, _float_ and _double_ data is ordered by ascending numeric value. +* _bytes_ and fixed data are compared lexicographically by unsigned 8-bit values. +* _string_ data is compared lexicographically by Unicode code point. Note that since UTF-8 is used as the binary encoding for strings, sorting of bytes and string binary data is identical. +* _array_ data is compared lexicographically by element. +* _enum_ data is ordered by the symbol's position in the enum schema. For example, an enum whose symbols are `["z", "a"]` would sort "z" values before "a" values. +* _union_ data is first ordered by the branch within the union, and, within that, by the type of the branch. For example, an `["int", "string"]` union would order all int values before all string values, with the ints and strings themselves ordered as defined above. +* _record_ data is ordered lexicographically by field. If a field specifies that its order is: + * "ascending", then the order of its values is unaltered. + * "descending", then the order of its values is reversed. + * "ignore", then its values are ignored when sorting. +* _map_ data may not be compared. It is an error to attempt to compare data containing maps unless those maps are in an `"order":"ignore"` record field. + +## Object Container Files +Avro includes a simple object container file format. A file has a schema, and all objects stored in the file must be written according to that schema, using binary encoding. Objects are stored in blocks that may be compressed. Syncronization markers are used between blocks to permit efficient splitting of files for MapReduce processing. + +Files may include arbitrary user-specified metadata. + +A file consists of: + +* A file header, followed by +* one or more file data blocks. + +A file header consists of: + +* Four bytes, ASCII 'O', 'b', 'j', followed by 1. +* file metadata, including the schema. +* The 16-byte, randomly-generated sync marker for this file. + +File metadata is written as if defined by the following [map]({{< ref "#schema-maps" >}}) schema: +```json +{"type": "map", "values": "bytes"} +``` +All metadata properties that start with "avro." are reserved. The following file metadata properties are currently used: + +* **avro.schema** contains the schema of objects stored in the file, as JSON data (required). +* **avro.codec** the name of the compression codec used to compress blocks, as a string. Implementations are required to support the following codecs: "null" and "deflate". If codec is absent, it is assumed to be "null". The codecs are described with more detail below. + +A file header is thus described by the following schema: +```json +{"type": "record", "name": "org.apache.avro.file.Header", + "fields" : [ + {"name": "magic", "type": {"type": "fixed", "name": "Magic", "size": 4}}, + {"name": "meta", "type": {"type": "map", "values": "bytes"}}, + {"name": "sync", "type": {"type": "fixed", "name": "Sync", "size": 16}} + ] +} +``` + +A file data block consists of: + +* A long indicating the count of objects in this block. +* A long indicating the size in bytes of the serialized objects in the current block, after any codec is applied +* The serialized objects. If a codec is specified, this is compressed by that codec. +* The file's 16-byte sync marker. + +A file data block is thus described by the following schema: +```json +{"type": "record", "name": "org.apache.avro.file.DataBlock", + "fields" : [ + {"name": "count", "type": "long"}, + {"name": "data", "type": "bytes"}, + {"name": "sync", "type": {"type": "fixed", "name": "Sync", "size": 16}} + ] +} +``` + +Each block's binary data can be efficiently extracted or skipped without deserializing the contents. The combination of block size, object counts, and sync markers enable detection of corrupt blocks and help ensure data integrity. + +### Required Codecs + +_null_ + +The "null" codec simply passes through data uncompressed. + +_deflate_ + +The "deflate" codec writes the data block using the deflate algorithm as specified in [RFC 1951](https://www.isi.edu/in-notes/rfc1951.txt), and typically implemented using the zlib library. Note that this format (unlike the "zlib format" in RFC 1950) does not have a checksum. + +### Optional Codecs +_bzip2_ + +The "bzip2" codec uses the [bzip2](https://sourceware.org/bzip2/) compression library. + +_snappy_ + +The "snappy" codec uses Google's [Snappy](https://code.google.com/p/snappy/) compression library. Each compressed block is followed by the 4-byte, big-endian CRC32 checksum of the uncompressed data in the block. + +_xz_ + +The "xz" codec uses the [XZ](https://tukaani.org/xz/) compression library. + +_zstandard_ + +The "zstandard" codec uses Facebook's [Zstandard](https://facebook.github.io/zstd/) compression library. + +### Protocol Declaration +Avro protocols describe RPC interfaces. Like schemas, they are defined with JSON text. + +A protocol is a JSON object with the following attributes: + +* _protocol_, a string, the name of the protocol (required); +* _namespace_, an optional string that qualifies the name (optional); +* _doc_, an optional string describing this protocol; +* _types_, an optional list of definitions of named types (records, enums, fixed and errors). An error definition is just like a record definition except it uses "error" instead of "record". Note that forward references to named types are not permitted. +* _messages_, an optional JSON object whose keys are message names and whose values are objects whose attributes are described below. No two messages may have the same name. + +The name and namespace qualification rules defined for schema objects apply to protocols as well. + +### Messages +A message has attributes: + +* a _doc_, an optional description of the message, +* a _request_, a list of named, typed parameter schemas (this has the same form as the fields of a record declaration); +* a _response_ schema; +* an optional union of declared error schemas. The effective union has "string" prepended to the declared union, to permit transmission of undeclared "system" errors. For example, if the declared error union is `["AccessError"]`, then the effective union is `["string", "AccessError"]`. When no errors are declared, the effective error union is `["string"]`. Errors are serialized using the effective union; however, a protocol's JSON declaration contains only the declared union. +* an optional one-way boolean parameter. + +A request parameter list is processed equivalently to an anonymous record. Since record field lists may vary between reader and writer, request parameters may also differ between the caller and responder, and such differences are resolved in the same manner as record field differences. + +The one-way parameter may only be true when the response type is `"null"` and no errors are listed. + +### Sample Protocol +For example, one may define a simple HelloWorld protocol with: +```json +{ + "namespace": "com.acme", + "protocol": "HelloWorld", + "doc": "Protocol Greetings", + + "types": [ + {"name": "Greeting", "type": "record", "fields": [ + {"name": "message", "type": "string"}]}, + {"name": "Curse", "type": "error", "fields": [ + {"name": "message", "type": "string"}]} + ], + + "messages": { + "hello": { + "doc": "Say hello.", + "request": [{"name": "greeting", "type": "Greeting" }], + "response": "Greeting", + "errors": ["Curse"] + } + } +} +``` + +## Protocol Wire Format + +### Message Transport +Messages may be transmitted via different transport mechanisms. + +To the transport, a _message_ is an opaque byte sequence. + +A transport is a system that supports: + +* **transmission of request messages** +* **receipt of corresponding response messages** +Servers may send a response message back to the client corresponding to a request message. The mechanism of correspondence is transport-specific. For example, in HTTP it is implicit, since HTTP directly supports requests and responses. But a transport that multiplexes many client threads over a single socket would need to tag messages with unique identifiers. + +Transports may be either stateless or stateful. In a stateless transport, messaging assumes no established connection state, while stateful transports establish connections that may be used for multiple messages. This distinction is discussed further in the [handshake](#handshake) section below. + +#### HTTP as Transport +When [HTTP](https://www.w3.org/Protocols/rfc2616/rfc2616.html) is used as a transport, each Avro message exchange is an HTTP request/response pair. All messages of an Avro protocol should share a single URL at an HTTP server. Other protocols may also use that URL. Both normal and error Avro response messages should use the 200 (OK) response code. The chunked encoding may be used for requests and responses, but, regardless the Avro request and response are the entire content of an HTTP request and response. The HTTP Content-Type of requests and responses should be specified as "avro/binary". Requests should be made using the POST method. + +HTTP is used by Avro as a stateless transport. + +### Message Framing +Avro messages are _framed_ as a list of buffers. + +Framing is a layer between messages and the transport. It exists to optimize certain operations. + +The format of framed message data is: + +* a series of buffers, where each buffer consists of: + * a four-byte, big-endian _buffer length_, followed by + * that many bytes of _buffer_ data. +* a message is always terminated by a zero-length buffer. + +Framing is transparent to request and response message formats (described below). Any message may be presented as a single or multiple buffers. + +Framing can permit readers to more efficiently get different buffers from different sources and for writers to more efficiently store different buffers to different destinations. In particular, it can reduce the number of times large binary objects are copied. For example, if an RPC parameter consists of a megabyte of file data, that data can be copied directly to a socket from a file descriptor, and, on the other end, it could be written directly to a file descriptor, never entering user space. + +A simple, recommended, framing policy is for writers to create a new segment whenever a single binary object is written that is larger than a normal output buffer. Small objects are then appended in buffers, while larger objects are written as their own buffers. When a reader then tries to read a large object the runtime can hand it an entire buffer directly, without having to copy it. + +### Handshake +The purpose of the handshake is to ensure that the client and the server have each other's protocol definition, so that the client can correctly deserialize responses, and the server can correctly deserialize requests. Both clients and servers should maintain a cache of recently seen protocols, so that, in most cases, a handshake will be completed without extra round-trip network exchanges or the transmission of full protocol text. + +RPC requests and responses may not be processed until a handshake has been completed. With a stateless transport, all requests and responses are prefixed by handshakes. With a stateful transport, handshakes are only attached to requests and responses until a successful handshake response has been returned over a connection. After this, request and response payloads are sent without handshakes for the lifetime of that connection. + +The handshake process uses the following record schemas: +```json +{ + "type": "record", + "name": "HandshakeRequest", "namespace":"org.apache.avro.ipc", + "fields": [ + {"name": "clientHash", + "type": {"type": "fixed", "name": "MD5", "size": 16}}, + {"name": "clientProtocol", "type": ["null", "string"]}, + {"name": "serverHash", "type": "MD5"}, + {"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]} + ] +} +{ + "type": "record", + "name": "HandshakeResponse", "namespace": "org.apache.avro.ipc", + "fields": [ + {"name": "match", + "type": {"type": "enum", "name": "HandshakeMatch", + "symbols": ["BOTH", "CLIENT", "NONE"]}}, + {"name": "serverProtocol", + "type": ["null", "string"]}, + {"name": "serverHash", + "type": ["null", {"type": "fixed", "name": "MD5", "size": 16}]}, + {"name": "meta", + "type": ["null", {"type": "map", "values": "bytes"}]} + ] +} +``` + +* A client first prefixes each request with a `HandshakeRequest` containing just the hash of its protocol and of the server's protocol (`clientHash!=null, clientProtocol=null, serverHash!=null`), where the hashes are 128-bit MD5 hashes of the JSON protocol text. If a client has never connected to a given server, it sends its hash as a guess of the server's hash, otherwise it sends the hash that it previously obtained from this server. +The server responds with a HandshakeResponse containing one of: + * `match=BOTH, serverProtocol=null, serverHash=null` if the client sent the valid hash of the server's protocol and the server knows what protocol corresponds to the client's hash. In this case, the request is complete and the response data immediately follows the HandshakeResponse. + * `match=CLIENT, serverProtocol!=null, serverHash!=null` if the server has previously seen the client's protocol, but the client sent an incorrect hash of the server's protocol. The request is complete and the response data immediately follows the HandshakeResponse. The client must use the returned protocol to process the response and should also cache that protocol and its hash for future interactions with this server. + * `match=NONE` if the server has not previously seen the client's protocol. The serverHash and serverProtocol may also be non-null if the server's protocol hash was incorrect. +In this case the client must then re-submit its request with its protocol text (`clientHash!=null, clientProtocol!=null, serverHash!=null`) and the server should respond with a successful match (match=BOTH, serverProtocol=null, serverHash=null) as above. + +The meta field is reserved for future handshake enhancements. + +### Call Format +A _call_ consists of a request message paired with its resulting response or error message. Requests and responses contain extensible metadata, and both kinds of messages are framed as described above. + +The format of a call request is: + +* _request metadata_, a map with values of type bytes +* the _message name_, an Avro string, followed by +* the _message parameters_. Parameters are serialized according to the message's request declaration. +When the empty string is used as a message name a server should ignore the parameters and return an empty response. A client may use this to ping a server or to perform a handshake without sending a protocol message. + +When a message is declared one-way and a stateful connection has been established by a successful handshake response, no response data is sent. Otherwise the format of the call response is: + +* _response metadata_, a map with values of type bytes +* a one-byte error _flag_ boolean, followed by either: + * if the error flag is false, the message _response_, serialized per the message's response schema. + * if the error flag is true, the _error_, serialized per the message's effective error union schema. + +### Schema Resolution {#schema-resolution} +A reader of Avro data, whether from an RPC or a file, can always parse that data because the original schema must be provided along with the data. However, the reader may be programmed to read data into a different schema. For example, if the data was written with a different version of the software than it is read, then fields may have been added or removed from records. This section specifies how such schema differences should be resolved. + +We refer to the schema used to write the data as the writer's schema, and the schema that the application expects the reader's schema. Differences between these should be resolved as follows: + +* It is an error if the two schemas do not _match_. +To match, one of the following must hold: + * both schemas are arrays whose item types match + * both schemas are maps whose value types match + * both schemas are enums whose (unqualified) names match + * both schemas are fixed whose sizes and (unqualified) names match + * both schemas are records with the same (unqualified) name + * either schema is a union + * both schemas have same primitive type + * the writer's schema may be promoted to the reader's as follows: + * int is promotable to long, float, or double + * long is promotable to float or double + * float is promotable to double + * string is promotable to bytes + * bytes is promotable to string +* **if both are records**: + * the ordering of fields may be different: fields are matched by name. + * schemas for fields with the same name in both records are resolved recursively. + * if the writer's record contains a field with a name not present in the reader's record, the writer's value for that field is ignored. + * if the reader's record schema has a field that contains a default value, and writer's schema does not have a field with the same name, then the reader should use the default value from its field. + * if the reader's record schema has a field with no default value, and writer's schema does not have a field with the same name, an error is signalled. +* **if both are enums**: +if the writer's symbol is not present in the reader's enum and the reader has a default value, then that value is used, otherwise an error is signalled. + +* **if both are arrays**: +This resolution algorithm is applied recursively to the reader's and writer's array item schemas. + +* **if both are maps**: +This resolution algorithm is applied recursively to the reader's and writer's value schemas. + +* **if both are unions**: +The first schema in the reader's union that matches the selected writer's union schema is recursively resolved against it. if none match, an error is signalled. + +* **if reader's is a union, but writer's is not** +The first schema in the reader's union that matches the writer's schema is recursively resolved against it. If none match, an error is signalled. + +* **if writer's is a union, but reader's is not** +If the reader's schema matches the selected writer's schema, it is recursively resolved against it. If they do not match, an error is signalled. + +A schema's _doc_ fields are ignored for the purposes of schema resolution. Hence, the _doc_ portion of a schema may be dropped at serialization. + +### Parsing Canonical Form for Schemas {#parsing-canonical-form-for-schemas} +One of the defining characteristics of Avro is that a reader must use the schema used by the writer of the data in order to know how to read the data. This assumption results in a data format that's compact and also amenable to many forms of schema evolution. However, the specification so far has not defined what it means for the reader to have the "same" schema as the writer. Does the schema need to be textually identical? Well, clearly adding or removing some whitespace to a JSON expression does not change its meaning. At the same time, reordering the fields of records clearly does change the meaning. So what does it mean for a reader to have "the same" schema as a writer? + +Parsing Canonical Form is a transformation of a writer's schema that let's us define what it means for two schemas to be "the same" for the purpose of reading data written against the schema. It is called Parsing Canonical Form because the transformations strip away parts of the schema, like "doc" attributes, that are irrelevant to readers trying to parse incoming data. It is called Canonical Form because the transformations normalize the JSON text (such as the order of attributes) in a way that eliminates unimportant differences between schemas. If the Parsing Canonical Forms of two different schemas are textually equal, then those schemas are "the same" as far as any reader is concerned, i.e., there is no serialized data that would allow a reader to distinguish data generated by a writer using one of the original schemas from data generated by a writing using the other original schema. (We sketch a proof of this property in a companion document.) + +The next subsection specifies the transformations that define Parsing Canonical Form. But with a well-defined canonical form, it can be convenient to go one step further, transforming these canonical forms into simple integers ("fingerprints") that can be used to uniquely identify schemas. The subsection after next recommends some standard practices for generating such fingerprints. + +#### Transforming into Parsing Canonical Form +Assuming an input schema (in JSON form) that's already UTF-8 text for a _valid_ Avro schema (including all quotes as required by JSON), the following transformations will produce its Parsing Canonical Form: + +* [PRIMITIVES] Convert primitive schemas to their simple form (e.g., int instead of `{"type":"int"}`). +* [FULLNAMES] Replace short names with fullnames, using applicable namespaces to do so. Then eliminate namespace attributes, which are now redundant. +* [STRIP] Keep only attributes that are relevant to parsing data, which are: _type_, _name_, _fields_, _symbols_, _items_, _values_, _size_. Strip all others (e.g., _doc_ and _aliases_). +* [ORDER] Order the appearance of fields of JSON objects as follows: _name_, _type_, _fields_, _symbols_, _items_, _values_, _size_. For example, if an object has _type_, _name_, and _size_ fields, then the _name_ field should appear first, followed by the _type_ and then the _size_ fields. +* [STRINGS] For all JSON string literals in the schema text, replace any escaped characters (e.g., \uXXXX escapes) with their UTF-8 equivalents. +* [INTEGERS] Eliminate quotes around and any leading zeros in front of JSON integer literals (which appear in the _size_ attributes of _fixed_ schemas). +* [WHITESPACE] Eliminate all whitespace in JSON outside of string literals. + +#### Schema Fingerprints {#schema-fingerprints} +"[A] fingerprinting algorithm is a procedure that maps an arbitrarily large data item (such as a computer file) to a much shorter bit string, its fingerprint, that uniquely identifies the original data for all practical purposes" (quoted from [Wikipedia](https://en.wikipedia.org/wiki/Fingerprint_(computing))). In the Avro context, fingerprints of Parsing Canonical Form can be useful in a number of applications; for example, to cache encoder and decoder objects, to tag data items with a short substitute for the writer's full schema, and to quickly negotiate common-case schemas between readers and writers. + +In designing fingerprinting algorithms, there is a fundamental trade-off between the length of the fingerprint and the probability of collisions. To help application designers find appropriate points within this trade-off space, while encouraging interoperability and ease of implementation, we recommend using one of the following three algorithms when fingerprinting Avro schemas: + +* When applications can tolerate longer fingerprints, we recommend using the [SHA-256 digest algorithm](https://en.wikipedia.org/wiki/SHA-2) to generate 256-bit fingerprints of Parsing Canonical Forms. Most languages today have SHA-256 implementations in their libraries. +* At the opposite extreme, the smallest fingerprint we recommend is a 64-bit [Rabin fingerprint](https://en.wikipedia.org/wiki/Rabin_fingerprint). Below, we provide pseudo-code for this algorithm that can be easily translated into any programming language. 64-bit fingerprints should guarantee uniqueness for schema caches of up to a million entries (for such a cache, the chance of a collision is 3E-8). We don't recommend shorter fingerprints, as the chances of collisions is too great (for example, with 32-bit fingerprints, a cache with as few as 100,000 schemas has a 50% chance of having a collision). +* Between these two extremes, we recommend using the [MD5 message digest](https://en.wikipedia.org/wiki/MD5) to generate 128-bit fingerprints. These make sense only where very large numbers of schemas are being manipulated (tens of millions); otherwise, 64-bit fingerprints should be sufficient. As with SHA-256, MD5 implementations are found in most libraries today. + +These fingerprints are not meant to provide any security guarantees, even the longer SHA-256-based ones. Most Avro applications should be surrounded by security measures that prevent attackers from writing random data and otherwise interfering with the consumers of schemas. We recommend that these surrounding mechanisms be used to prevent collision and pre-image attacks (i.e., "forgery") on schema fingerprints, rather than relying on the security properties of the fingerprints themselves. + +Rabin fingerprints are [cyclic redundancy checks](https://en.wikipedia.org/wiki/Cyclic_redundancy_check) computed using irreducible polynomials. In the style of the Appendix of [RFC 1952](https://www.ietf.org/rfc/rfc1952.txt) (pg 10), which defines the CRC-32 algorithm, here's our definition of the 64-bit AVRO fingerprinting algorithm: +```java +long fingerprint64(byte[] buf) { + if (FP_TABLE == null) initFPTable(); + long fp = EMPTY; + for (int i = 0; i < buf.length; i++) + fp = (fp >>> 8) ^ FP_TABLE[(int)(fp ^ buf[i]) & 0xff]; + return fp; +} + +static long EMPTY = 0xc15d213aa4d7a795L; +static long[] FP_TABLE = null; + +void initFPTable() { + FP_TABLE = new long[256]; + for (int i = 0; i < 256; i++) { + long fp = i; + for (int j = 0; j < 8; j++) + fp = (fp >>> 1) ^ (EMPTY & -(fp & 1L)); + FP_TABLE[i] = fp; + } +} +``` + +Readers interested in the mathematics behind this algorithm may want to read [Chapter 14 of the Second Edition of Hacker's Delight](https://books.google.com/books?id=XD9iAwAAQBAJ&pg=PA319). (Unlike RFC-1952 and the book chapter, we prepend a single one bit to messages. We do this because CRCs ignore leading zero bits, which can be problematic. Our code prepends a one-bit by initializing fingerprints using EMPTY, rather than initializing using zero as in RFC-1952 and the book chapter.) + +## Logical Types +A logical type is an Avro primitive or complex type with extra attributes to represent a derived type. The attribute `logicalType` must always be present for a logical type, and is a string with the name of one of the logical types listed later in this section. Other attributes may be defined for particular logical types. + +A logical type is always serialized using its underlying Avro type so that values are encoded in exactly the same way as the equivalent Avro type that does not have a `logicalType` attribute. Language implementations may choose to represent logical types with an appropriate native type, although this is not required. + +Language implementations must ignore unknown logical types when reading, and should use the underlying Avro type. If a logical type is invalid, for example a decimal with scale greater than its precision, then implementations should ignore the logical type and use the underlying Avro type. + +### Decimal +The `decimal` logical type represents an arbitrary-precision signed decimal number of the form _unscaled × 10-scale_. + +A `decimal` logical type annotates Avro _bytes_ or _fixed_ types. The byte array must contain the two's-complement representation of the unscaled integer value in big-endian byte order. The scale is fixed, and is specified using an attribute. + +The following attributes are supported: + +* _scale_, a JSON integer representing the scale (optional). If not specified the scale is 0. +* _precision_, a JSON integer representing the (maximum) precision of decimals stored in this type (required). +For example, the following schema represents decimal numbers with a maximum precision of 4 and a scale of 2: +```json +{ + "type": "bytes", + "logicalType": "decimal", + "precision": 4, + "scale": 2 +} +``` +Precision must be a positive integer greater than zero. If the underlying type is a _fixed_, then the precision is limited by its size. An array of length n can store at most _floor(log10(28 × n - 1 - 1))_ base-10 digits of precision. + +Scale must be zero or a positive integer less than or equal to the precision. + +For the purposes of schema resolution, two schemas that are `decimal` logical types _match_ if their scales and precisions match. + +### UUID +The `uuid` logical type represents a random generated universally unique identifier (UUID). + +A `uuid` logical type annotates an Avro `string`. The string has to conform with [RFC-4122](https://www.ietf.org/rfc/rfc4122.txt) + +### Date +The `date` logical type represents a date within the calendar, with no reference to a particular time zone or time of day. + +A `date` logical type annotates an Avro `int`, where the int stores the number of days from the unix epoch, 1 January 1970 (ISO calendar). + +The following schema represents a date: +```json +{ + "type": "int", + "logicalType": "date" +} +``` + +### Time (millisecond precision) +The `time-millis` logical type represents a time of day, with no reference to a particular calendar, time zone or date, with a precision of one millisecond. + +A `time-millis` logical type annotates an Avro `int`, where the int stores the number of milliseconds after midnight, 00:00:00.000. + +### Time (microsecond precision) +The `time-micros` logical type represents a time of day, with no reference to a particular calendar, time zone or date, with a precision of one microsecond. + +A `time-micros` logical type annotates an Avro `long`, where the long stores the number of microseconds after midnight, 00:00:00.000000. + +### Timestamp (millisecond precision) +The `timestamp-millis` logical type represents an instant on the global timeline, independent of a particular time zone or calendar, with a precision of one millisecond. Please note that time zone information gets lost in this process. Upon reading a value back, we can only reconstruct the instant, but not the original representation. In practice, such timestamps are typically displayed to users in their local time zones, therefore they may be displayed differently depending on the execution environment. + +A `timestamp-millis` logical type annotates an Avro `long`, where the long stores the number of milliseconds from the unix epoch, 1 January 1970 00:00:00.000 UTC. + +### Timestamp (microsecond precision) +The `timestamp-micros` logical type represents an instant on the global timeline, independent of a particular time zone or calendar, with a precision of one microsecond. Please note that time zone information gets lost in this process. Upon reading a value back, we can only reconstruct the instant, but not the original representation. In practice, such timestamps are typically displayed to users in their local time zones, therefore they may be displayed differently depending on the execution environment. + +A `timestamp-micros` logical type annotates an Avro `long`, where the long stores the number of microseconds from the unix epoch, 1 January 1970 00:00:00.000000 UTC. + +### Local timestamp (millisecond precision) +The `local-timestamp-millis` logical type represents a timestamp in a local timezone, regardless of what specific time zone is considered local, with a precision of one millisecond. + +A `local-timestamp-millis` logical type annotates an Avro `long`, where the long stores the number of milliseconds, from 1 January 1970 00:00:00.000. + +### Local timestamp (microsecond precision) +The `local-timestamp-micros` logical type represents a timestamp in a local timezone, regardless of what specific time zone is considered local, with a precision of one microsecond. + +A `local-timestamp-micros` logical type annotates an Avro `long`, where the long stores the number of microseconds, from 1 January 1970 00:00:00.000000. + +### Duration +The `duration` logical type represents an amount of time defined by a number of months, days and milliseconds. This is not equivalent to a number of milliseconds, because, depending on the moment in time from which the duration is measured, the number of days in the month and number of milliseconds in a day may differ. Other standard periods such as years, quarters, hours and minutes can be expressed through these basic periods. + +A `duration` logical type annotates Avro `fixed` type of size 12, which stores three little-endian unsigned integers that represent durations at different granularities of time. The first stores a number in months, the second stores a number in days, and the third stores a number in milliseconds. diff --git a/doc/content/en/docs/1.11.4/_index.md b/doc/content/en/docs/1.11.4/_index.md new file mode 100755 index 00000000000..89513a1b6c2 --- /dev/null +++ b/doc/content/en/docs/1.11.4/_index.md @@ -0,0 +1,59 @@ +--- +title: "Apache Avroâ„ĸ 1.11.4 Documentation" +linkTitle: "1.11.4" +type: docs +weight: -1114 +--- + + + +## Introduction + +Apache Avroâ„ĸ is a data serialization system. + +Avro provides: + +* Rich data structures. +* A compact, fast, binary data format. +* A container file, to store persistent data. +* Remote procedure call (RPC). +* Simple integration with dynamic languages. Code generation is not required to read or write data files nor to use or implement RPC protocols. Code generation as an optional optimization, only worth implementing for statically typed languages. + +## Schemas + +Avro relies on schemas. When Avro data is read, the schema used when writing it is always present. This permits each datum to be written with no per-value overheads, making serialization both fast and small. This also facilitates use with dynamic, scripting languages, since data, together with its schema, is fully self-describing. + +When Avro data is stored in a file, its schema is stored with it, so that files may be processed later by any program. If the program reading the data expects a different schema this can be easily resolved, since both schemas are present. + +When Avro is used in RPC, the client and server exchange schemas in the connection handshake. (This can be optimized so that, for most calls, no schemas are actually transmitted.) Since both client and server both have the other's full schema, correspondence between same named fields, missing fields, extra fields, etc. can all be easily resolved. + +Avro schemas are defined with JSON . This facilitates implementation in languages that already have JSON libraries. + +## Comparison with other systems + +Avro provides functionality similar to systems such as [Thrift](https://thrift.apache.org/), [Protocol Buffers](https://code.google.com/p/protobuf/), etc. Avro differs from these systems in the following fundamental aspects. + +* Dynamic typing: Avro does not require that code be generated. Data is always accompanied by a schema that permits full processing of that data without code generation, static datatypes, etc. This facilitates construction of generic data-processing systems and languages. +* Untagged data: Since the schema is present when data is read, considerably less type information need be encoded with data, resulting in smaller serialization size. +* No manually-assigned field IDs: When a schema changes, both the old and new schema are always present when processing data, so differences may be resolved symbolically, using field names. + + diff --git a/doc/content/en/docs/1.11.4/api-c++.md b/doc/content/en/docs/1.11.4/api-c++.md new file mode 100644 index 00000000000..110508cebdb --- /dev/null +++ b/doc/content/en/docs/1.11.4/api-c++.md @@ -0,0 +1,29 @@ +--- +title: "C++ API" +linkTitle: "C++ API" +weight: 102 +manualLink: /docs/1.11.4/api/cpp/html/ +--- + + + +The C++ API documentation can be found here. diff --git a/doc/content/en/docs/1.11.4/api-c.md b/doc/content/en/docs/1.11.4/api-c.md new file mode 100644 index 00000000000..91b20b3bade --- /dev/null +++ b/doc/content/en/docs/1.11.4/api-c.md @@ -0,0 +1,29 @@ +--- +title: "C API" +linkTitle: "C API" +weight: 101 +manualLink: /docs/1.11.4/api/c/ +--- + + + +The C API documentation can be found here. diff --git a/doc/content/en/docs/1.11.4/api-csharp.md b/doc/content/en/docs/1.11.4/api-csharp.md new file mode 100644 index 00000000000..ca56d9189ea --- /dev/null +++ b/doc/content/en/docs/1.11.4/api-csharp.md @@ -0,0 +1,29 @@ +--- +title: "C# API" +linkTitle: "C# API" +weight: 103 +manualLink: /docs/1.11.4/api/csharp/html/ +--- + + + +The C# API documentation can be found here. diff --git a/doc/content/en/docs/1.11.4/api-java.md b/doc/content/en/docs/1.11.4/api-java.md new file mode 100644 index 00000000000..1dc9568b58b --- /dev/null +++ b/doc/content/en/docs/1.11.4/api-java.md @@ -0,0 +1,29 @@ +--- +title: "Java API" +linkTitle: "Java API" +weight: 100 +manualLink: /docs/1.11.4/api/java/ +--- + + + +The Javadocs can be found here. diff --git a/doc/content/en/docs/1.11.4/api-py.md b/doc/content/en/docs/1.11.4/api-py.md new file mode 100644 index 00000000000..c43ec8ce3d9 --- /dev/null +++ b/doc/content/en/docs/1.11.4/api-py.md @@ -0,0 +1,29 @@ +--- +title: "Python API" +linkTitle: "Python API" +weight: 104 +manualLink: /docs/1.11.4/api/py/html/ +--- + + + +The Python API documentation can be found here. diff --git a/doc/content/en/docs/1.11.4/logo.svg b/doc/content/en/docs/1.11.4/logo.svg new file mode 100644 index 00000000000..b44ed197262 --- /dev/null +++ b/doc/content/en/docs/1.11.4/logo.svg @@ -0,0 +1,22 @@ + + + diff --git a/doc/content/en/docs/1.12.0/Getting started (Java)/_index.md b/doc/content/en/docs/1.12.0/Getting started (Java)/_index.md new file mode 100644 index 00000000000..adf77dd3598 --- /dev/null +++ b/doc/content/en/docs/1.12.0/Getting started (Java)/_index.md @@ -0,0 +1,289 @@ +--- +categories: [] +tags: ["java"] +title: "Getting Started (Java)" +linkTitle: "Getting Started (Java)" +weight: 2 +--- + + + +This is a short guide for getting started with Apache Avroâ„ĸ using Java. This guide only covers using Avro for data serialization; see Patrick Hunt's [Avro RPC Quick Start](https://github.com/phunt/avro-rpc-quickstart) for a good introduction to using Avro for RPC. + +## Download + +Avro implementations for C, C++, C#, Java, PHP, Python, and Ruby can be downloaded from the [Apache Avroâ„ĸ Download]({{< relref "/project/download" >}}) page. This guide uses Avro 1.12.0, the latest version at the time of writing. For the examples in this guide, download avro-1.12.0.jar and avro-tools-1.12.0.jar. + +Alternatively, if you are using Maven, add the following dependency to your POM: + +```xml + + org.apache.avro + avro + 1.12.0 + +``` + +As well as the Avro Maven plugin (for performing code generation): + +```xml + + org.apache.avro + avro-maven-plugin + 1.12.0 + + ${project.basedir}/src/main/avro/ + ${project.basedir}/src/main/java/ + + + + generate-sources + + schema + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + 1.8 + 1.8 + + +``` + +You may also build the required Avro jars from source. Building Avro is beyond the scope of this guide; see the Build Documentation page in the wiki for more information. + +## Defining a schema + +Avro schemas are defined using JSON or IDL (the latter requires an extra dependency). Schemas are composed of primitive types (null, boolean, int, long, float, double, bytes, and string) and complex types (record, enum, array, map, union, and fixed). You can learn more about Avro schemas and types from the specification, but for now let's start with a simple schema example, user.avsc: + +```json +{"namespace": "example.avro", + "type": "record", + "name": "User", + "fields": [ + {"name": "name", "type": "string"}, + {"name": "favorite_number", "type": ["int", "null"]}, + {"name": "favorite_color", "type": ["string", "null"]} + ] +} +``` + +This schema defines a record representing a hypothetical user. (Note that a schema file can only contain a single schema definition.) At minimum, a record definition must include its type ("type": "record"), a name ("name": "User"), and fields, in this case name, favorite_number, and favorite_color. We also define a namespace ("namespace": "example.avro"), which together with the name attribute defines the "full name" of the schema (example.avro.User in this case). + +Fields are defined via an array of objects, each of which defines a name and type (other attributes are optional, see the record specification for more details). The type attribute of a field is another schema object, which can be either a primitive or complex type. For example, the name field of our User schema is the primitive type string, whereas the favorite_number and favorite_color fields are both unions, represented by JSON arrays. unions are a complex type that can be any of the types listed in the array; e.g., favorite_number can either be an int or null, essentially making it an optional field. + +## Serializing and deserializing with code generation + +### Compiling the schema +Code generation allows us to automatically create classes based on our previously-defined schema. Once we have defined the relevant classes, there is no need to use the schema directly in our programs. We use the avro-tools jar to generate code as follows: + +```shell +java -jar /path/to/avro-tools-1.12.0.jar compile schema +``` + +This will generate the appropriate source files in a package based on the schema's namespace in the provided destination folder. For instance, to generate a User class in package example.avro from the schema defined above, run + +```shell +java -jar /path/to/avro-tools-1.12.0.jar compile schema user.avsc . +``` + +Note that if you using the Avro Maven plugin, there is no need to manually invoke the schema compiler; the plugin automatically performs code generation on any .avsc files present in the configured source directory. + +### Creating Users +Now that we've completed the code generation, let's create some Users, serialize them to a data file on disk, and then read back the file and deserialize the User objects. + +First let's create some Users and set their fields. + +```java +User user1 = new User(); +user1.setName("Alyssa"); +user1.setFavoriteNumber(256); +// Leave favorite color null + +// Alternate constructor +User user2 = new User("Ben", 7, "red"); + +// Construct via builder +User user3 = User.newBuilder() + .setName("Charlie") + .setFavoriteColor("blue") + .setFavoriteNumber(null) + .build(); +``` + +As shown in this example, Avro objects can be created either by invoking a constructor directly or by using a builder. Unlike constructors, builders will automatically set any default values specified in the schema. Additionally, builders validate the data as it set, whereas objects constructed directly will not cause an error until the object is serialized. However, using constructors directly generally offers better performance, as builders create a copy of the datastructure before it is written. + +Note that we do not set user1's favorite color. Since that record is of type ["string", "null"], we can either set it to a string or leave it null; it is essentially optional. Similarly, we set user3's favorite number to null (using a builder requires setting all fields, even if they are null). + +### Serializing +Now let's serialize our Users to disk. + +```java +// Serialize user1, user2 and user3 to disk +DatumWriter userDatumWriter = new SpecificDatumWriter(User.class); +DataFileWriter dataFileWriter = new DataFileWriter(userDatumWriter); +dataFileWriter.create(user1.getSchema(), new File("users.avro")); +dataFileWriter.append(user1); +dataFileWriter.append(user2); +dataFileWriter.append(user3); +dataFileWriter.close(); +``` + +We create a DatumWriter, which converts Java objects into an in-memory serialized format. The SpecificDatumWriter class is used with generated classes and extracts the schema from the specified generated type. + +Next we create a DataFileWriter, which writes the serialized records, as well as the schema, to the file specified in the dataFileWriter.create call. We write our users to the file via calls to the dataFileWriter.append method. When we are done writing, we close the data file. + +### Deserializing +Finally, let's deserialize the data file we just created. + +```java +// Deserialize Users from disk +DatumReader userDatumReader = new SpecificDatumReader(User.class); +DataFileReader dataFileReader = new DataFileReader(file, userDatumReader); +User user = null; +while (dataFileReader.hasNext()) { +// Reuse user object by passing it to next(). This saves us from +// allocating and garbage collecting many objects for files with +// many items. +user = dataFileReader.next(user); +System.out.println(user); +} +``` + +This snippet will output: + +```json +{"name": "Alyssa", "favorite_number": 256, "favorite_color": null} +{"name": "Ben", "favorite_number": 7, "favorite_color": "red"} +{"name": "Charlie", "favorite_number": null, "favorite_color": "blue"} +``` + +Deserializing is very similar to serializing. We create a SpecificDatumReader, analogous to the SpecificDatumWriter we used in serialization, which converts in-memory serialized items into instances of our generated class, in this case User. We pass the DatumReader and the previously created File to a DataFileReader, analogous to the DataFileWriter, which reads both the schema used by the writer as well as the data from the file on disk. The data will be read using the writer's schema included in the file and the schema provided by the reader, in this case the User class. The writer's schema is needed to know the order in which fields were written, while the reader's schema is needed to know what fields are expected and how to fill in default values for fields added since the file was written. If there are differences between the two schemas, they are resolved according to the Schema Resolution specification. + +Next we use the DataFileReader to iterate through the serialized Users and print the deserialized object to stdout. Note how we perform the iteration: we create a single User object which we store the current deserialized user in, and pass this record object to every call of dataFileReader.next. This is a performance optimization that allows the DataFileReader to reuse the same User object rather than allocating a new User for every iteration, which can be very expensive in terms of object allocation and garbage collection if we deserialize a large data file. While this technique is the standard way to iterate through a data file, it's also possible to use for (User user : dataFileReader) if performance is not a concern. + +### Compiling and running the example code +This example code is included as a Maven project in the examples/java-example directory in the Avro docs. From this directory, execute the following commands to build and run the example: + +```shell +$ mvn compile # includes code generation via Avro Maven plugin +$ mvn -q exec:java -Dexec.mainClass=example.SpecificMain +``` + +### Beta feature: Generating faster code +In release 1.9.0, we introduced a new approach to generating code that speeds up decoding of objects by more than 10% and encoding by more than 30% (future performance enhancements are underway). To ensure a smooth introduction of this change into production systems, this feature is controlled by a feature flag, the system property org.apache.avro.specific.use_custom_coders. In this first release, this feature is off by default. To turn it on, set the system flag to true at runtime. In the sample above, for example, you could enable the fater coders as follows: + +$ mvn -q exec:java -Dexec.mainClass=example.SpecificMain \ + -Dorg.apache.avro.specific.use_custom_coders=true + +Note that you do not have to recompile your Avro schema to have access to this feature. The feature is compiled and built into your code, and you turn it on and off at runtime using the feature flag. As a result, you can turn it on during testing, for example, and then off in production. Or you can turn it on in production, and quickly turn it off if something breaks. + +We encourage the Avro community to exercise this new feature early to help build confidence. (For those paying on demand for compute resources in the cloud, it can lead to meaningful cost savings.) As confidence builds, we will turn this feature on by default, and eventually eliminate the feature flag (and the old code). + +## Serializing and deserializing without code generation +Data in Avro is always stored with its corresponding schema, meaning we can always read a serialized item regardless of whether we know the schema ahead of time. This allows us to perform serialization and deserialization without code generation. + +Let's go over the same example as in the previous section, but without using code generation: we'll create some users, serialize them to a data file on disk, and then read back the file and deserialize the users objects. + +### Creating users +First, we use a SchemaParser to read our schema definition and create a Schema object. + +```java +Schema schema = new SchemaParser().parse(new File("user.avsc")).mainSchema(); +``` + +Using this schema, let's create some users. + +```java +GenericRecord user1 = new GenericData.Record(schema); +user1.put("name", "Alyssa"); +user1.put("favorite_number", 256); +// Leave favorite color null + +GenericRecord user2 = new GenericData.Record(schema); +user2.put("name", "Ben"); +user2.put("favorite_number", 7); +user2.put("favorite_color", "red"); +``` + +Since we're not using code generation, we use GenericRecords to represent users. GenericRecord uses the schema to verify that we only specify valid fields. If we try to set a non-existent field (e.g., user1.put("favorite_animal", "cat")), we'll get an AvroRuntimeException when we run the program. + +Note that we do not set user1's favorite color. Since that record is of type ["string", "null"], we can either set it to a string or leave it null; it is essentially optional. + +### Serializing +Now that we've created our user objects, serializing and deserializing them is almost identical to the example above which uses code generation. The main difference is that we use generic instead of specific readers and writers. + +First we'll serialize our users to a data file on disk. + +```java +// Serialize user1 and user2 to disk +File file = new File("users.avro"); +DatumWriter datumWriter = new GenericDatumWriter(schema); +DataFileWriter dataFileWriter = new DataFileWriter(datumWriter); +dataFileWriter.create(schema, file); +dataFileWriter.append(user1); +dataFileWriter.append(user2); +dataFileWriter.close(); +``` + +We create a DatumWriter, which converts Java objects into an in-memory serialized format. Since we are not using code generation, we create a GenericDatumWriter. It requires the schema both to determine how to write the GenericRecords and to verify that all non-nullable fields are present. + +As in the code generation example, we also create a DataFileWriter, which writes the serialized records, as well as the schema, to the file specified in the dataFileWriter.create call. We write our users to the file via calls to the dataFileWriter.append method. When we are done writing, we close the data file. + +### Deserializing +Finally, we'll deserialize the data file we just created. + +```java +// Deserialize users from disk +DatumReader datumReader = new GenericDatumReader(schema); +DataFileReader dataFileReader = new DataFileReader(file, datumReader); +GenericRecord user = null; +while (dataFileReader.hasNext()) { +// Reuse user object by passing it to next(). This saves us from +// allocating and garbage collecting many objects for files with +// many items. +user = dataFileReader.next(user); +System.out.println(user); +``` + +This outputs: + +```json +{"name": "Alyssa", "favorite_number": 256, "favorite_color": null} +{"name": "Ben", "favorite_number": 7, "favorite_color": "red"} +``` + +Deserializing is very similar to serializing. We create a GenericDatumReader, analogous to the GenericDatumWriter we used in serialization, which converts in-memory serialized items into GenericRecords. We pass the DatumReader and the previously created File to a DataFileReader, analogous to the DataFileWriter, which reads both the schema used by the writer as well as the data from the file on disk. The data will be read using the writer's schema included in the file, and the reader's schema provided to the GenericDatumReader. The writer's schema is needed to know the order in which fields were written, while the reader's schema is needed to know what fields are expected and how to fill in default values for fields added since the file was written. If there are differences between the two schemas, they are resolved according to the Schema Resolution specification. + +Next, we use the DataFileReader to iterate through the serialized users and print the deserialized object to stdout. Note how we perform the iteration: we create a single GenericRecord object which we store the current deserialized user in, and pass this record object to every call of dataFileReader.next. This is a performance optimization that allows the DataFileReader to reuse the same record object rather than allocating a new GenericRecord for every iteration, which can be very expensive in terms of object allocation and garbage collection if we deserialize a large data file. While this technique is the standard way to iterate through a data file, it's also possible to use for (GenericRecord user : dataFileReader) if performance is not a concern. + +### Compiling and running the example code +This example code is included as a Maven project in the examples/java-example directory in the Avro docs. From this directory, execute the following commands to build and run the example: + +```shell +$ mvn compile +$ mvn -q exec:java -Dexec.mainClass=example.GenericMain +``` diff --git a/doc/content/en/docs/1.12.0/Getting started (Python)/_index.md b/doc/content/en/docs/1.12.0/Getting started (Python)/_index.md new file mode 100644 index 00000000000..8ec8f6d4e82 --- /dev/null +++ b/doc/content/en/docs/1.12.0/Getting started (Python)/_index.md @@ -0,0 +1,147 @@ +--- +categories: [] +tags: ["python"] +title: "Getting Started (Python)" +linkTitle: "Getting Started (Python)" +weight: 3 +--- + + + +This is a short guide for getting started with Apache Avroâ„ĸ using Python. This guide only covers using Avro for data serialization; see Patrick Hunt's Avro RPC Quick Start for a good introduction to using Avro for RPC. + +## Notice for Python 3 users +A package called "avro-python3" had been provided to support Python 3 previously, but the codebase was consolidated into the "avro" package and that supports both Python 2 and 3 now. The avro-python3 package will be removed in the near future, so users should use the "avro" package instead. They are mostly API compatible, but there's a few minor difference (e.g., function name capitalization, such as avro.schema.Parse vs avro.schema.parse). + +## Download +For Python, the easiest way to get started is to install it from PyPI. Python's Avro API is available over PyPi. + +```shell +$ python3 -m pip install avro +``` + +The official releases of the Avro implementations for C, C++, C#, Java, PHP, Python, and Ruby can be downloaded from the Apache Avroâ„ĸ Releases page. This guide uses Avro 1.12.0, the latest version at the time of writing. Download and unzip avro-1.12.0.tar.gz, and install via python setup.py (this will probably require root privileges). Ensure that you can import avro from a Python prompt. + +```shell +$ tar xvf avro-1.12.0.tar.gz +$ cd avro-1.12.0 +$ python setup.py install +$ python +>>> import avro # should not raise ImportError +``` + +Alternatively, you may build the Avro Python library from source. From your the root Avro directory, run the commands + +```shell +$ cd lang/py/ +$ python3 -m pip install -e . +$ python +``` + +## Defining a schema +Avro schemas are defined using JSON. Schemas are composed of primitive types (null, boolean, int, long, float, double, bytes, and string) and complex types (record, enum, array, map, union, and fixed). You can learn more about Avro schemas and types from the specification, but for now let's start with a simple schema example, user.avsc: + +```json +{"namespace": "example.avro", + "type": "record", + "name": "User", + "fields": [ + {"name": "name", "type": "string"}, + {"name": "favorite_number", "type": ["int", "null"]}, + {"name": "favorite_color", "type": ["string", "null"]} + ] +} +``` + +This schema defines a record representing a hypothetical user. (Note that a schema file can only contain a single schema definition.) At minimum, a record definition must include its type ("type": "record"), a name ("name": "User"), and fields, in this case name, favorite_number, and favorite_color. We also define a namespace ("namespace": "example.avro"), which together with the name attribute defines the "full name" of the schema (example.avro.User in this case). + +Fields are defined via an array of objects, each of which defines a name and type (other attributes are optional, see the record specification for more details). The type attribute of a field is another schema object, which can be either a primitive or complex type. For example, the name field of our User schema is the primitive type string, whereas the favorite_number and favorite_color fields are both unions, represented by JSON arrays. unions are a complex type that can be any of the types listed in the array; e.g., favorite_number can either be an int or null, essentially making it an optional field. + +## Serializing and deserializing without code generation +Data in Avro is always stored with its corresponding schema, meaning we can always read a serialized item, regardless of whether we know the schema ahead of time. This allows us to perform serialization and deserialization without code generation. Note that the Avro Python library does not support code generation. + +Try running the following code snippet, which serializes two users to a data file on disk, and then reads back and deserializes the data file: + +```python +import avro.schema +from avro.datafile import DataFileReader, DataFileWriter +from avro.io import DatumReader, DatumWriter + +schema = avro.schema.parse(open("user.avsc", "rb").read()) + +writer = DataFileWriter(open("users.avro", "wb"), DatumWriter(), schema) +writer.append({"name": "Alyssa", "favorite_number": 256}) +writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"}) +writer.close() + +reader = DataFileReader(open("users.avro", "rb"), DatumReader()) +for user in reader: + print(user) +reader.close() +``` + +This outputs: + +```json +{'favorite_color': None, 'favorite_number': 256, 'name': 'Alyssa'} +{'favorite_color': 'red', 'favorite_number': 7, 'name': 'Ben'} +``` + +Do make sure that you open your files in binary mode (i.e. using the modes wb or rb respectively). Otherwise you might generate corrupt files due to automatic replacement of newline characters with the platform-specific representations. + +Let's take a closer look at what's going on here. + +```python +schema = avro.schema.parse(open("user.avsc", "rb").read()) +``` + +avro.schema.parse takes a string containing a JSON schema definition as input and outputs a avro.schema.Schema object (specifically a subclass of Schema, in this case RecordSchema). We're passing in the contents of our user.avsc schema file here. + +```python +writer = DataFileWriter(open("users.avro", "wb"), DatumWriter(), schema) +``` + +We create a DataFileWriter, which we'll use to write serialized items to a data file on disk. The DataFileWriter constructor takes three arguments: + +* The file we'll serialize to +* A DatumWriter, which is responsible for actually serializing the items to Avro's binary format (DatumWriters can be used separately from DataFileWriters, e.g., to perform IPC with Avro). +* The schema we're using. The DataFileWriter needs the schema both to write the schema to the data file, and to verify that the items we write are valid items and write the appropriate fields. + +```python +writer.append({"name": "Alyssa", "favorite_number": 256}) +writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"}) +``` + +We use DataFileWriter.append to add items to our data file. Avro records are represented as Python dicts. Since the field favorite_color has type ["string", "null"], we are not required to specify this field, as shown in the first append. Were we to omit the required name field, an exception would be raised. Any extra entries not corresponding to a field are present in the dict are ignored. + +```python +reader = DataFileReader(open("users.avro", "rb"), DatumReader()) +``` + +We open the file again, this time for reading back from disk. We use a DataFileReader and DatumReader analagous to the DataFileWriter and DatumWriter above. + +```python +for user in reader: + print(user) +``` + +The DataFileReader is an iterator that returns dicts corresponding to the serialized items. diff --git a/doc/content/en/docs/1.12.0/IDL Language/_index.md b/doc/content/en/docs/1.12.0/IDL Language/_index.md new file mode 100644 index 00000000000..68c5605b7b3 --- /dev/null +++ b/doc/content/en/docs/1.12.0/IDL Language/_index.md @@ -0,0 +1,511 @@ +--- +title: "IDL Language" +linkTitle: "IDL Language" +weight: 201 +--- + + + +## Introduction +This document defines Avro IDL, a higher-level language for authoring Avro schemata. Before reading this document, you should have familiarity with the concepts of schemata and protocols, as well as the various primitive and complex types available in Avro. + +## Overview + +### Purpose +The aim of the Avro IDL language is to enable developers to author schemata in a way that feels more similar to common programming languages like Java, C++, or Python. Additionally, the Avro IDL language may feel more familiar for those users who have previously used the interface description languages (IDLs) in other frameworks like Thrift, Protocol Buffers, or CORBA. + +### Usage +Each Avro IDL file defines either a single Avro Protocol, or an Avro Schema with supporting named schemata in a namespace. When parsed, it thus yields either a Protocol or a Schema. These can be respectively written to JSON-format Avro Protocol files with extension .avpr or JSON-format Avro Schema files with extension .avsc. + +To convert a _.avdl_ file into a _.avpr_ file, it may be processed by the `idl` tool. For example: +```shell +$ java -jar avro-tools.jar idl src/test/idl/input/namespaces.avdl /tmp/namespaces.avpr +$ head /tmp/namespaces.avpr +{ + "protocol" : "TestNamespace", + "namespace" : "avro.test.protocol", +``` +To convert a _.avdl_ file into a _.avsc_ file, it may be processed by the `idl` tool too. For example: +```shell +$ java -jar avro-tools.jar idl src/test/idl/input/schema_syntax_schema.avdl /tmp/schema_syntax.avsc +$ head /tmp/schema_syntax.avsc +{ + "type": "array", + "items": { + "type": "record", + "name": "StatusUpdate", +``` +The `idl` tool can also process input to and from _stdin_ and _stdout_. See `idl --help` for full usage information. + +A Maven plugin is also provided to compile .avdl files. To use it, add something like the following to your pom.xml: +```xml + + + + org.apache.avro + avro-maven-plugin + + + + idl + + + + + + +``` + +## Defining a Schema in Avro IDL +An Avro IDL file consists of exactly one (main) schema definition. The minimal schema is defined by the following code: +```java +schema int; +``` +This is equivalent to (and generates) the following JSON schema definition: +```json +{ + "type": "int" +} +``` +More complex schemata can also be defined, for example by adding named schemata like this: +```java +namespace default.namespace.for.named.schemata; +schema Message; + +record Message { + string? title = null; + string message; +} +``` +This is equivalent to (and generates) the following JSON schema definition: +```json +{ + "type" : "record", + "name" : "Message", + "namespace" : "default.namespace.for.named.schemata", + "fields" : [ { + "name" : "title", + "type" : [ "null", "string" ], + "default": null + }, { + "name" : "message", + "type" : "string" + } ] +} +``` +Schemata in Avro IDL can contain the following items: + +* Imports of external protocol and schema files (only named schemata are imported). +* Definitions of named schemata, including records, errors, enums, and fixeds. + +## Defining a Protocol in Avro IDL +An Avro IDL file consists of exactly one protocol definition. The minimal protocol is defined by the following code: +```java +protocol MyProtocol { +} +``` +This is equivalent to (and generates) the following JSON protocol definition: +```json +{ +"protocol" : "MyProtocol", + "types" : [ ], + "messages" : { + } +} +``` +The namespace of the protocol may be changed using the @namespace annotation: +```java +@namespace("mynamespace") +protocol MyProtocol { +} +``` +This notation is used throughout Avro IDL as a way of specifying properties for the annotated element, as will be described later in this document. + +Protocols in Avro IDL can contain the following items: + +* Imports of external protocol and schema files. +* Definitions of named schemata, including records, errors, enums, and fixeds. +* Definitions of RPC messages + +## Imports +Files may be imported in one of three formats: + +* An IDL file may be imported with a statement like: + + `import idl "foo.avdl";` + +* A JSON protocol file may be imported with a statement like: + + `import protocol "foo.avpr";` + +* A JSON schema file may be imported with a statement like: + + `import schema "foo.avsc";` + +When importing into an IDL schema file, only (named) types are imported into this file. When importing into an IDL protocol, messages are imported into the protocol as well. + +Imported file names are resolved relative to the current IDL file. + +## Defining an Enumeration +Enums are defined in Avro IDL using a syntax similar to C or Java. An Avro Enum supports optional default values. In the case that a reader schema is unable to recognize a symbol written by the writer, the reader will fall back to using the defined default value. This default is only used when an incompatible symbol is read. It is not used if the enum field is missing. + +Example Writer Enum Definition +```java +enum Shapes { + SQUARE, TRIANGLE, CIRCLE, OVAL +} +``` +Example Reader Enum Definition +```java +enum Shapes { + SQUARE, TRIANGLE, CIRCLE +} = CIRCLE; +``` +In the above example, the reader will use the default value of `CIRCLE` whenever reading data written with the `OVAL` symbol of the writer. Also note that, unlike the JSON format, anonymous enums cannot be defined. + +## Defining a Fixed Length Field +Fixed fields are defined using the following syntax: +``` +fixed MD5(16); +``` +This example defines a fixed-length type called MD5, which contains 16 bytes. + +## Defining Records and Errors +Records are defined in Avro IDL using a syntax similar to a struct definition in C: +```java +record Employee { + string name; + boolean active = true; + long salary; +} +``` +The above example defines a record with the name “Employee” with three fields. + +To define an error, simply use the keyword _error_ instead of _record_. For example: +```java +error Kaboom { + string explanation; + int result_code = -1; +} +``` +Each field in a record or error consists of a type and a name, optional property annotations and an optional default value. + +A type reference in Avro IDL must be one of: + +* A primitive type +* A logical type +* A named schema (either defined or imported) +* A complex type (array, map, or union) + +### Primitive Types +The primitive types supported by Avro IDL are the same as those supported by Avro's JSON format. This list includes _int_, _long_, _string_, _boolean_, _float_, _double_, _null_, and _bytes_. + +### Logical Types +Some of the logical types supported by Avro's JSON format are directly supported by Avro IDL. The currently supported types are: + +* _decimal_ (logical type [decimal]({{< relref "../specification#decimal" >}})) +* _date_ (logical type [date]({{< relref "../specification#date" >}})) +* _time_ms_ (logical type [time-millis]({{< relref "../specification#time-millisecond-precision" >}})) +* _timestamp_ms_ (logical type [timestamp-millis]({{< relref "../specification#timestamp-millisecond-precision" >}})) +* _local_timestamp_ms_ (logical type [local-timestamp-millis]({{< relref "../specification#local_timestamp_ms" >}})) +* _uuid_ (logical type [uuid]({{< relref "../specification#uuid" >}})) + +For example: +```java +record Job { + string jobid; + date submitDate; + time_ms submitTime; + timestamp_ms finishTime; + decimal(9,2) finishRatio; + uuid pk = "a1a2a3a4-b1b2-c1c2-d1d2-d3d4d5d6d7d8"; +} +``` + +Logical types can also be specified via an annotation, which is useful for logical types for which a keyword does not exist: + +```java +record Job { + string jobid; + @logicalType("timestamp-micros") + long finishTime; +} +``` + +### References to Named Schemata +If a named schema has already been defined in the same Avro IDL file, it may be referenced by name as if it were a primitive type: +```java +record Card { + Suit suit; // refers to the enum Card defined above + int number; +} +``` + +### Default Values +Default values for fields may be optionally specified by using an equals sign after the field name followed by a JSON expression indicating the default value. This JSON is interpreted as described in the [spec]({{< relref "../specification#schema-record" >}}). + +### Complex Types + +#### Arrays +Array types are written in a manner that will seem familiar to C++ or Java programmers. An array of any type t is denoted `array`. For example, an array of strings is denoted `array`, and a multidimensional array of Foo records would be `array>`. + +#### Maps +Map types are written similarly to array types. An array that contains values of type t is written `map`. As in the JSON schema format, all maps contain `string`-type keys. + +#### Unions +Union types are denoted as `union { typeA, typeB, typeC, ... }`. For example, this record contains a string field that is optional (unioned with null), and a field containing either a precise or a imprecise number: +```java +record RecordWithUnion { + union { null, string } optionalString; + union { decimal(12, 6), float } number; +} +``` +Note that the same restrictions apply to Avro IDL unions as apply to unions defined in the JSON format; namely, a union may not contain multiple elements of the same type. Also, fields/parameters that use the union type and have a default parameter must specify a default value of the same type as the **first** union type. + +Because it occurs so often, there is a special shorthand to denote a union of `null` with one other schema. The first three fields in the following snippet have identical schemata, as do the last two fields: + +```java +record RecordWithUnion { + union { null, string } optionalString1 = null; + string? optionalString2 = null; + string? optionalString3; // No default value + + union { string, null } optionalString4 = "something"; + string? optionalString5 = "something else"; +} +``` + +Note that unlike explicit unions, the position of the `null` type is fluid; it will be the first or last type depending on the default value (if any). So all fields are valid in the example above. + +## Defining RPC Messages +The syntax to define an RPC message within a Avro IDL protocol is similar to the syntax for a method declaration within a C header file or a Java interface. To define an RPC message _add_ which takes two arguments named _foo_ and _bar_, returning an _int_, simply include the following definition within the protocol: +```java +int add(int foo, int bar = 0); +``` +Message arguments, like record fields, may specify default values. + +To define a message with no response, you may use the alias _void_, equivalent to the Avro _null_ type: +```java +void logMessage(string message); +``` +If you have defined or imported an error type within the same protocol, you may declare that a message can throw this error using the syntax: +```java +void goKaboom() throws Kaboom; +``` +To define a one-way message, use the keyword `oneway` after the parameter list, for example: +```java +void fireAndForget(string message) oneway; +``` + +## Other Language Features + +### Comments and documentation +All Java-style comments are supported within a Avro IDL file. Any text following _//_ on a line is ignored, as is any text between _/*_ and _*/_, possibly spanning multiple lines. + +Comments that begin with _/**_ are used as the documentation string for the type or field definition that follows the comment. + +### Escaping Identifiers +Occasionally, one may want to distinguish between identifiers and languages keywords. In order to do so, backticks (`) may be used to escape +the identifier. For example, to define a message with the literal name error, you may write: +```java +void `error`(); +``` +This syntax is allowed anywhere an identifier is expected. + +### Annotations for Ordering and Namespaces +Java-style annotations may be used to add additional properties to types and fields throughout Avro IDL. These can be custom properties, or +special properties as used in the JSON-format Avro Schema and Protocol files. + +For example, to specify the sort order of a field within a record, one may use the `@order` annotation before the field name as follows: +```java +record MyRecord { + string @order("ascending") myAscendingSortField; + string @order("descending") myDescendingField; + string @order("ignore") myIgnoredField; +} +``` +A field's type (with the exception of type references) may also be preceded by annotations, e.g.: +```java +record MyRecord { + @java-class("java.util.ArrayList") array myStrings; +} +``` +This can be used to support java classes that can be serialized/deserialized via their `toString`/`String constructor`, e.g.: +```java +record MyRecord { + @java-class("java.math.BigDecimal") string value; + @java-key-class("java.io.File") map fileStates; + array<@java-class("java.math.BigDecimal") string> weights; +} +``` +Similarly, a `@namespace` annotation may be used to modify the namespace when defining a named schema. For example: +```java +@namespace("org.apache.avro.firstNamespace") +protocol MyProto { + @namespace("org.apache.avro.someOtherNamespace") + record Foo {} + + record Bar {} +} +``` +will define a protocol in the _firstNamespace_ namespace. The record _Foo_ will be defined in _someOtherNamespace_ and _Bar_ will be defined in _firstNamespace_ as it inherits its default from its container. + +Type and field aliases are specified with the `@aliases` annotation as follows: +```java +@aliases(["org.old.OldRecord", "org.ancient.AncientRecord"]) +record MyRecord { + string @aliases(["oldField", "ancientField"]) myNewField; +} +``` +Some annotations like those listed above are handled specially. All other annotations are added as properties to the protocol, message, schema or field. You can use any identifier or series of identifiers separated by dots and/or dashes as property name. + +## Complete Example +The following is an example of two Avro IDL files that together show most of the above features: + +### schema.avdl +```java +/* + * Header with license information. + */ +// Optional default namespace (if absent, the default namespace is the null namespace). +namespace org.apache.avro.test; +// Optional main schema definition; if used, the IDL file is equivalent to a .avsc file. +schema TestRecord; + +/** Documentation for the enum type Kind */ +@aliases(["org.foo.KindOf"]) +enum Kind { + FOO, + BAR, // the bar enum value + BAZ +} = FOO; // For schema evolution purposes, unmatched values do not throw an error, but are resolved to FOO. + +/** MD5 hash; good enough to avoid most collisions, and smaller than (for example) SHA256. */ +fixed MD5(16); + +record TestRecord { + /** Record name; has no intrinsic order */ + string @order("ignore") name; + + Kind @order("descending") kind; + + MD5 hash; + + /* + Note that 'null' is the first union type. Just like .avsc / .avpr files, the default value must be of the first union type. + */ + union { null, MD5 } /** Optional field */ @aliases(["hash"]) nullableHash = null; + // Shorthand syntax; the null in this union is placed based on the default value (or first is there's no default). + MD5? anotherNullableHash = null; + + array arrayOfLongs; +} +``` + +### protocol.avdl +```java +/* + * Header with license information. + */ + +/** + * An example protocol in Avro IDL + */ +@namespace("org.apache.avro.test") +protocol Simple { + // Import the example file above + import idl "schema.avdl"; + + /** Errors are records that can be thrown from a method */ + error TestError { + string message; + } + + string hello(string greeting); + /** Return what was given. Demonstrates the use of backticks to name types/fields/messages/parameters after keywords */ + TestRecord echo(TestRecord `record`); + int add(int arg1, int arg2); + bytes echoBytes(bytes data); + void `error`() throws TestError; + // The oneway keyword forces the method to return null. + void ping() oneway; +} +``` + +Additional examples may be found in the Avro source tree under the `src/test/idl/input` directory. + +## IDE support + +There are several editors and IDEs that support Avro IDL files, usually via plugins. + +### JetBrains + +Apache Avro IDL Schema Support 203.1.2 was released in 9 December 2021. + +Features: +* Syntax Highlighting +* Code Completion +* Code Formatting +* Error Highlighting +* Inspections & quick fixes +* JSON schemas for .avpr and .avsc files + +It's available via the [JetBrains Marketplace](https://plugins.jetbrains.com/plugin/15728-apache-avro-idl-schema-support) +and on [GitHub](https://github.com/opwvhk/avro-schema-support). + +The plugin supports almost the all JetBrains products: IntelliJ IDEA, PyCharm, WebStorm, Android Studio, AppCode, GoLand, Rider, CLion, RubyMine, PhpStorm, DataGrip, DataSpell, MPS, Code With Me Guest and JetBrains Client. + +Only JetBrains Gateway does not support this plugin directly. But the backend (JetBrains) IDE that it connects to does. + +### Eclipse + +Avroclipse 0.0.11 was released on 4 December 2019. + +Features: +* Syntax Highlighting +* Error Highlighting +* Code Completion + +It is available on the [Eclipse Marketplace](https://marketplace.eclipse.org/content/avroclipse) +and [GitHub](https://github.com/dvdkruk/avroclipse). + +### Visual Studio Code + +avro-idl 0.5.0 was released on 16 June 2021. It provides syntax highlighting. + +It is available on the [VisualStudio Marketplace](https://marketplace.visualstudio.com/items?itemName=streetsidesoftware.avro) +and [GitHub](https://github.com/Jason3S/vscode-avro-ext) + +### Atom.io + +atom-language-avro 0.0.13 was released on 14 August 2015. It provides syntax highlighting. + +It is available as [Atom.io package](https://atom.io/packages/atom-language-avro) +and [GitHub](https://github.com/jonesetc/atom-language-avro) + +### Vim + +A `.avdl` detecting plugin by Gurpreet Atwal on [GitHub](https://github.com/gurpreetatwal/vim-avro) (Last change in December 2016) + +[avro-idl.vim](https://github.com/apache/avro/blob/main/share/editors/avro-idl.vim) in the Avro repository `share/editors` directory (last change in September 2010) + +Both provide syntax highlighting. diff --git a/doc/content/en/docs/1.12.0/MapReduce guide/_index.md b/doc/content/en/docs/1.12.0/MapReduce guide/_index.md new file mode 100644 index 00000000000..fdae67a78c6 --- /dev/null +++ b/doc/content/en/docs/1.12.0/MapReduce guide/_index.md @@ -0,0 +1,396 @@ +--- +title: "MapReduce guide" +linkTitle: "MapReduce guide" +weight: 200 +--- + + + +Avro provides a convenient way to represent complex data structures within a Hadoop MapReduce job. Avro data can be used as both input to and output from a MapReduce job, as well as the intermediate format. The example in this guide uses Avro data for all three, but it's possible to mix and match; for instance, MapReduce can be used to aggregate a particular field in an Avro record. + +This guide assumes basic familiarity with both Hadoop MapReduce and Avro. See the [Hadoop documentation](https://hadoop.apache.org/docs/current/) and the [Avro getting started guide](./getting-started-java/) for introductions to these projects. This guide uses the old MapReduce API (`org.apache.hadoop.mapred`) and the new MapReduce API (`org.apache.hadoop.mapreduce`). + +## Setup +The code from this guide is included in the Avro docs under examples/mr-example. The example is set up as a Maven project that includes the necessary Avro and MapReduce dependencies and the Avro Maven plugin for code generation, so no external jars are needed to run the example. In particular, the POM includes the following dependencies: +```xml + + org.apache.avro + avro + 1.12.0 + + + org.apache.avro + avro-mapred + 1.12.0 + + + org.apache.hadoop + hadoop-client + 3.1.2 + +``` +And the following plugin: +```xml + + org.apache.avro + avro-maven-plugin + 1.12.0 + + + generate-sources + + schema + + + ${project.basedir}/../ + ${project.basedir}/target/generated-sources/ + + + + +``` + +If you do not configure the *sourceDirectory* and *outputDirectory* properties, the defaults will be used. The *sourceDirectory* property defaults to *src/main/avro*. The *outputDirectory* property defaults to *target/generated-sources*. You can change the paths to match your project layout. + +Alternatively, Avro jars can be downloaded directly from the Apache Avroâ„ĸ Releases [page](https://avro.apache.org/releases.html). The relevant Avro jars for this guide are *avro-1.12.0.jar* and *avro-mapred-1.12.0.jar*, as well as *avro-tools-1.12.0.jar* for code generation and viewing Avro data files as JSON. In addition, you will need to install Hadoop in order to use MapReduce. + +## Example: ColorCount +Below is a simple example of a MapReduce that uses Avro. There is an example for both the old (org.apache.hadoop.mapred) and new (org.apache.hadoop.mapreduce) APIs under *examples/mr-example/src/main/java/example/*. _MapredColorCount_ is the example for the older mapred API while _MapReduceColorCount_ is the example for the newer mapreduce API. Both examples are below, but we will detail the mapred API in our subsequent examples. + +MapredColorCount.java: +```java +package example; + +import java.io.IOException; + +import org.apache.avro.*; +import org.apache.avro.Schema.Type; +import org.apache.avro.mapred.*; +import org.apache.hadoop.conf.*; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapred.*; +import org.apache.hadoop.util.*; + +import example.avro.User; + +public class MapredColorCount extends Configured implements Tool { + + public static class ColorCountMapper extends AvroMapper> { + @Override + public void map(User user, AvroCollector> collector, Reporter reporter) + throws IOException { + CharSequence color = user.getFavoriteColor(); + // We need this check because the User.favorite_color field has type ["string", "null"] + if (color == null) { + color = "none"; + } + collector.collect(new Pair(color, 1)); + } + } + + public static class ColorCountReducer extends AvroReducer> { + @Override + public void reduce(CharSequence key, Iterable values, + AvroCollector> collector, + Reporter reporter) + throws IOException { + int sum = 0; + for (Integer value : values) { + sum += value; + } + collector.collect(new Pair(key, sum)); + } + } + + public int run(String[] args) throws Exception { + if (args.length != 2) { + System.err.println("Usage: MapredColorCount "); + return -1; + } + + JobConf conf = new JobConf(getConf(), MapredColorCount.class); + conf.setJobName("colorcount"); + + FileInputFormat.setInputPaths(conf, new Path(args[0])); + FileOutputFormat.setOutputPath(conf, new Path(args[1])); + + AvroJob.setMapperClass(conf, ColorCountMapper.class); + AvroJob.setReducerClass(conf, ColorCountReducer.class); + + // Note that AvroJob.setInputSchema and AvroJob.setOutputSchema set + // relevant config options such as input/output format, map output + // classes, and output key class. + AvroJob.setInputSchema(conf, User.getClassSchema()); + AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING), + Schema.create(Type.INT))); + + JobClient.runJob(conf); + return 0; + } + + public static void main(String[] args) throws Exception { + int res = ToolRunner.run(new Configuration(), new MapredColorCount(), args); + System.exit(res); + } +} +``` + +MapReduceColorCount.java: +```java +package example; + +import java.io.IOException; + +import org.apache.avro.Schema; +import org.apache.avro.mapred.AvroKey; +import org.apache.avro.mapred.AvroValue; +import org.apache.avro.mapreduce.AvroJob; +import org.apache.avro.mapreduce.AvroKeyInputFormat; +import org.apache.avro.mapreduce.AvroKeyValueOutputFormat; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +import example.avro.User; + +public class MapReduceColorCount extends Configured implements Tool { + + public static class ColorCountMapper extends + Mapper, NullWritable, Text, IntWritable> { + + @Override + public void map(AvroKey key, NullWritable value, Context context) + throws IOException, InterruptedException { + + CharSequence color = key.datum().getFavoriteColor(); + if (color == null) { + color = "none"; + } + context.write(new Text(color.toString()), new IntWritable(1)); + } + } + + public static class ColorCountReducer extends + Reducer, AvroValue> { + + @Override + public void reduce(Text key, Iterable values, + Context context) throws IOException, InterruptedException { + + int sum = 0; + for (IntWritable value : values) { + sum += value.get(); + } + context.write(new AvroKey(key.toString()), new AvroValue(sum)); + } + } + + public int run(String[] args) throws Exception { + if (args.length != 2) { + System.err.println("Usage: MapReduceColorCount "); + return -1; + } + + Job job = new Job(getConf()); + job.setJarByClass(MapReduceColorCount.class); + job.setJobName("Color Count"); + + FileInputFormat.setInputPaths(job, new Path(args[0])); + FileOutputFormat.setOutputPath(job, new Path(args[1])); + + job.setInputFormatClass(AvroKeyInputFormat.class); + job.setMapperClass(ColorCountMapper.class); + AvroJob.setInputKeySchema(job, User.getClassSchema()); + job.setMapOutputKeyClass(Text.class); + job.setMapOutputValueClass(IntWritable.class); + + job.setOutputFormatClass(AvroKeyValueOutputFormat.class); + job.setReducerClass(ColorCountReducer.class); + AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); + AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); + + return (job.waitForCompletion(true) ? 0 : 1); + } + + public static void main(String[] args) throws Exception { + int res = ToolRunner.run(new MapReduceColorCount(), args); + System.exit(res); + } +} +``` +ColorCount reads in data files containing *User* records, defined in _examples/user.avsc_, and counts the number of instances of each favorite color. (This example draws inspiration from the canonical _WordCount_ MapReduce application.) This example uses the old MapReduce API. See MapReduceAvroWordCount, found under _doc/examples/mr-example/src/main/java/example/_ to see the new MapReduce API example. The User schema is defined as follows: +```json +{"namespace": "example.avro", + "type": "record", + "name": "User", + "fields": [ + {"name": "name", "type": "string"}, + {"name": "favorite_number", "type": ["int", "null"]}, + {"name": "favorite_color", "type": ["string", "null"]} + ] +} +``` +This schema is compiled into the *User* class used by *ColorCount* via the Avro Maven plugin (see _examples/mr-example/pom.xml_ for how this is set up). + +*ColorCountMapper* essentially takes a *User* as input and extracts the User's favorite color, emitting the key-value pair ``. _ColorCountReducer_ then adds up how many occurrences of a particular favorite color were emitted, and outputs the result as a Pair record. These Pairs are serialized to an Avro data file. + +## Running ColorCount +The _ColorCount_ application is provided as a Maven project in the Avro docs under _examples/mr-example_. To build the project, including the code generation of the User schema, run: +```shell +mvn compile +``` +Next, run _GenerateData_ from `examples/mr-examples` to create an Avro data file, `input/users.avro`, containing 20 Users with favorite colors chosen randomly from a list: +```shell +mvn exec:java -q -Dexec.mainClass=example.GenerateData +``` +Besides creating the data file, GenerateData prints the JSON representations of the Users generated to stdout, for example: +```json +{"name": "user", "favorite_number": null, "favorite_color": "red"} +{"name": "user", "favorite_number": null, "favorite_color": "green"} +{"name": "user", "favorite_number": null, "favorite_color": "purple"} +{"name": "user", "favorite_number": null, "favorite_color": null} +... +``` +Now we're ready to run ColorCount. We specify our freshly-generated input folder as the input path and output as our output folder (note that MapReduce will not start a job if the output folder already exists): +```shell +mvn exec:java -q -Dexec.mainClass=example.MapredColorCount -Dexec.args="input output" +``` +Once ColorCount completes, checking the contents of the new output directory should yield the following: +```shell +$ ls output/ +part-00000.avro _SUCCESS +``` +You can check the contents of the generated Avro file using the avro-tools jar: +```shell +$ java -jar /path/to/avro-tools-1.12.0.jar tojson output/part-00000.avro +{"value": 3, "key": "blue"} +{"value": 7, "key": "green"} +{"value": 1, "key": "none"} +{"value": 2, "key": "orange"} +{"value": 3, "key": "purple"} +{"value": 2, "key": "red"} +{"value": 2, "key": "yellow"} +``` +Now let's go over the ColorCount example in detail. + +## AvroMapper - org.apache.hadoop.mapred API + +The easiest way to use Avro data files as input to a MapReduce job is to subclass `AvroMapper`. An `AvroMapper` defines a `map` function that takes an Avro datum as input and outputs a key/value pair represented as a Pair record. In the ColorCount example, ColorCountMapper is an AvroMapper that takes a User as input and outputs a `Pair>`, where the CharSequence key is the user's favorite color and the Integer value is 1. +```java +public static class ColorCountMapper extends AvroMapper> { + @Override + public void map(User user, AvroCollector> collector, Reporter reporter) + throws IOException { + CharSequence color = user.getFavoriteColor(); + // We need this check because the User.favorite_color field has type ["string", "null"] + if (color == null) { + color = "none"; + } + collector.collect(new Pair(color, 1)); + } +} +``` +In order to use our AvroMapper, we must call AvroJob.setMapperClass and AvroJob.setInputSchema. +```java +AvroJob.setMapperClass(conf, ColorCountMapper.class); +AvroJob.setInputSchema(conf, User.getClassSchema()); +``` +Note that `AvroMapper` does not implement the `Mapper` interface. Under the hood, the specified Avro data files are deserialized into AvroWrappers containing the actual data, which are processed by a Mapper that calls the configured AvroMapper's map function. AvroJob.setInputSchema sets up the relevant configuration parameters needed to make this happen, thus you should not need to call `JobConf.setMapperClass`, `JobConf.setInputFormat`, `JobConf.setMapOutputKeyClass`, `JobConf.setMapOutputValueClass`, or `JobConf.setOutputKeyComparatorClass`. + +## Mapper - org.apache.hadoop.mapreduce API +This document will not go into all the differences between the mapred and mapreduce APIs, however will describe the main differences. As you can see, ColorCountMapper is now a subclass of the Hadoop Mapper class and is passed an AvroKey as it's key. Additionally, the AvroJob method calls were slightly changed. +```java + public static class ColorCountMapper extends + Mapper, NullWritable, Text, IntWritable> { + + @Override + public void map(AvroKey key, NullWritable value, Context context) + throws IOException, InterruptedException { + + CharSequence color = key.datum().getFavoriteColor(); + if (color == null) { + color = "none"; + } + context.write(new Text(color.toString()), new IntWritable(1)); + } + } +``` + +## AvroReducer - org.apache.hadoop.mapred API +Analogously to AvroMapper, an AvroReducer defines a reducer function that takes the key/value types output by an AvroMapper (or any mapper that outputs Pairs) and outputs a key/value pair represented a Pair record. In the ColorCount example, ColorCountReducer is an AvroReducer that takes the CharSequence key representing a favorite color and the `Iterable` representing the counts for that color (they should all be 1 in this example) and adds up the counts. +```java +public static class ColorCountReducer extends AvroReducer> { + @Override + public void reduce(CharSequence key, Iterable values, + AvroCollector> collector, + Reporter reporter) + throws IOException { + int sum = 0; + for (Integer value : values) { + sum += value; + } + collector.collect(new Pair(key, sum)); + } +} +``` +In order to use our AvroReducer, we must call AvroJob.setReducerClass and AvroJob.setOutputSchema. +```java +AvroJob.setReducerClass(conf, ColorCountReducer.class); +AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING), + Schema.create(Type.INT))); +``` +Note that _AvroReducer_ does not implement the _Reducer_ interface. The intermediate Pairs output by the mapper are split into _AvroKeys_ and _AvroValues_, which are processed by a Reducer that calls the configured AvroReducer's `reduce` function. `AvroJob.setOutputSchema` sets up the relevant configuration parameters needed to make this happen, thus you should not need to call `JobConf.setReducerClass`, `JobConf.setOutputFormat`, `JobConf.setOutputKeyClass`, `JobConf.setMapOutputKeyClass`, `JobConf.setMapOutputValueClass`, or `JobConf.setOutputKeyComparatorClass`. + +## Reduce - org.apache.hadoop.mapreduce API +As before we not detail every difference between the APIs. As with the _Mapper_ change _ColorCountReducer_ is now a subclass of _Reducer_ and _AvroKey_ and _AvroValue_ are emitted. Additionally, the _AvroJob_ method calls were slightly changed. +```java + public static class ColorCountReducer extends + Reducer, AvroValue> { + + @Override + public void reduce(Text key, Iterable values, + Context context) throws IOException, InterruptedException { + + int sum = 0; + for (IntWritable value : values) { + sum += value.get(); + } + context.write(new AvroKey(key.toString()), new AvroValue(sum)); + } + } +``` + +## Learning more +The mapred API allows users to mix Avro AvroMappers and AvroReducers with non-Avro Mappers and Reducers and the mapreduce API allows users input Avro and output non-Avro or vice versa. + +The mapred package has API org.apache.avro.mapred documentation as does the `org.apache.avro.mapreduce` package. MapReduce API (`org.apache.hadoop.mapreduce`). Similarily to the mapreduce package, it's possible with the mapred API to implement your own Mappers and Reducers directly using the public classes provided in these libraries. See the `AvroWordCount` application, found under _examples/mr-example/src/main/java/example/AvroWordCount.java_ in the Avro documentation, for an example of implementing a Reducer that outputs Avro data using the old MapReduce API. See the `MapReduceAvroWordCount` application, found under _examples/mr-example/src/main/java/example/MapReduceAvroWordCount.java_ in the Avro documentation, for an example of implementing a Reducer that outputs Avro data using the new MapReduce API. diff --git a/doc/content/en/docs/1.12.0/SASL profile/_index.md b/doc/content/en/docs/1.12.0/SASL profile/_index.md new file mode 100644 index 00000000000..a938310414d --- /dev/null +++ b/doc/content/en/docs/1.12.0/SASL profile/_index.md @@ -0,0 +1,93 @@ +--- +title: "SASL profile" +linkTitle: "SASL profile" +weight: 202 +--- + + + +## Introduction +SASL ([RFC 2222](https://www.ietf.org/rfc/rfc2222.txt)) provides a framework for authentication and security of network protocols. Each protocol that uses SASL is meant to define a SASL profile. This document provides a SASL profile for connection-based Avro RPC. + +## Overview +SASL negotiation proceeds as a series of message interactions over a connection between a client and server using a selected SASL mechanism. The client starts this negotiation by sending its chosen mechanism name with an initial (possibly empty) message. Negotiation proceeds with the exchange of messages until either side indicates success or failure. The content of the messages is mechanism-specific. If the negotiation succeeds, then the session can proceed over the connection, otherwise it must be abandoned. + +Some mechanisms continue to process session data after negotiation (e.g., encrypting it), while some specify that further session data is transmitted unmodified. + +## Negotiation + +### Commands +Avro SASL negotiation uses four one-byte commands. + +* 0: START Used in a client's initial message. +* 1: CONTINUE Used while negotiation is ongoing. +* 2: FAIL Terminates negotiation unsuccessfully. +* 3: COMPLETE Terminates negotiation successfully. + +The format of a START message is: + +`| 0 | 4-byte mechanism name length | mechanism name | 4-byte payload length | payload data |` + +The format of a CONTINUE message is: + +`| 1 | 4-byte payload length | payload data |` + +The format of a FAIL message is: + +`| 2 | 4-byte message length | UTF-8 message |` + +The format of a COMPLETE message is: + +`| 3 | 4-byte payload length | payload data |` + +### Process +Negotiation is initiated by a client sending a START command containing the client's chosen mechanism name and any mechanism-specific payload data. + +The server and client then interchange some number (possibly zero) of CONTINUE messages. Each message contains payload data that is processed by the security mechanism to generate the next message. + +Once either the client or server send a FAIL message then negotiation has failed. UTF-8-encoded text is included in the failure message. Once either a FAIL message has been sent or received, or any other error occurs in the negotiation, further communication on this connection must cease. + +Once either the client or server send a COMPLETE message then negotiation has completed successfully. Session data may now be transmitted over the connection until it is closed by either side. + +## Session Data +If no SASL QOP (quality of protection) is negotiated, then all subsequent writes to/reads over this connection are written/read unmodified. In particular, messages use Avro [framing](#Message+Framing), and are of the form: + +`| 4-byte frame length | frame data | ... | 4 zero bytes |` + +If a SASL QOP is negotiated, then it must be used by the connection for all subsequent messages. This is done by wrapping each non-empty frame written using the security mechanism and unwrapping each non-empty frame read. The length written in each non-empty frame is the length of the wrapped data. Complete frames must be passed to the security mechanism for unwrapping. Unwrapped data is then passed to the application as the content of the frame. + +If at any point processing fails due to wrapping, unwrapping or framing errors, then all further communication on this connection must cease. + +## Anonymous Mechanism +The SASL anonymous mechanism ([RFC 2245](https://www.ietf.org/rfc/rfc2222.txt)) is quite simple to implement. In particular, an initial anonymous request may be prefixed by the following static sequence: + +`| 0 | 0009 | ANONYMOUS | 0000 |` + +If a server uses the anonymous mechanism, it should check that the mechanism name in the start message prefixing the first request received is 'ANONYMOUS', then simply prefix its initial response with a COMPLETE message of: + +`| 3 | 0000 |` + +If an anonymous server recieves some other mechanism name, then it may respond with a FAIL message as simple as: + +`| 2 | 0000 |` + +Note that the anonymous mechanism need add no additional round-trip messages between client and server. The START message can be piggybacked on the initial request and the COMPLETE or FAIL message can be piggybacked on the initial response. diff --git a/doc/content/en/docs/1.12.0/Specification/_index.md b/doc/content/en/docs/1.12.0/Specification/_index.md new file mode 100644 index 00000000000..75eda7b7f62 --- /dev/null +++ b/doc/content/en/docs/1.12.0/Specification/_index.md @@ -0,0 +1,896 @@ +--- +title: "Specification" +linkTitle: "Specification" +weight: 4 +date: 2021-10-25 +aliases: +- spec.html +--- + + + +## Introduction +This document defines Apache Avro. It is intended to be the authoritative specification. Implementations of Avro must adhere to this document. + +## Schema Declaration {#schema-declaration} +A Schema is represented in [JSON](https://www.json.org/) by one of: + +* A JSON string, naming a defined type. +* A JSON object, of the form: +```js +{"type": "typeName", ...attributes...} +``` +where _typeName_ is either a primitive or derived type name, as defined below. Attributes not defined in this document are permitted as metadata, but must not affect the format of serialized data. +* A JSON array, representing a union of embedded types. + +## Primitive Types +The set of primitive type names is: + +* _null_: no value +* _boolean_: a binary value +* _int_: 32-bit signed integer +* _long_: 64-bit signed integer +* _float_: single precision (32-bit) IEEE 754 floating-point number +* _double_: double precision (64-bit) IEEE 754 floating-point number +* _bytes_: sequence of 8-bit unsigned bytes +* _string_: unicode character sequence + +Primitive types have no specified attributes. + +Primitive type names are also defined type names. Thus, for example, the schema "string" is equivalent to: +```json +{"type": "string"} +``` + +## Complex Types +Avro supports six kinds of complex types: _records_, _enums_, _arrays_, _maps_, _unions_ and _fixed_. + +### Records {#schema-record} +Records use the type name "record" and support the following attributes: + +* _name_: a JSON string providing the name of the record (required). +* _namespace_, a JSON string that qualifies the name (optional); +* _doc_: a JSON string providing documentation to the user of this schema (optional). +* _aliases_: a JSON array of strings, providing alternate names for this record (optional). +* _fields_: a JSON array, listing fields (required). Each field is a JSON object with the following attributes: + * _name_: a JSON string providing the name of the field (required), and + * _doc_: a JSON string describing this field for users (optional). + * _type_: a [schema]({{< ref "#schema-declaration" >}} "Schema declaration"), as defined above + * _order_: specifies how this field impacts sort ordering of this record (optional). Valid values are "ascending" (the default), "descending", or "ignore". For more details on how this is used, see the sort order section below. + * _aliases_: a JSON array of strings, providing alternate names for this field (optional). + * _default_: A default value for this field, only used when reading instances that lack the field for schema evolution purposes. The presence of a default value does not make the field optional at encoding time. Permitted values depend on the field's schema type, according to the table below. Default values for union fields correspond to the first schema that matches in the union. Default values for bytes and fixed fields are JSON strings, where Unicode code points 0-255 are mapped to unsigned 8-bit byte values 0-255. Avro encodes a field even if its value is equal to its default. + +*field default values* + +| **avro type** | **json type** | **example** | +|---------------|----------------|-------------| +| null | null | `null` | +| boolean | boolean | `true` | +| int,long | integer | `1` | +| float,double | number | `1.1` | +| bytes | string | `"\u00FF"` | +| string | string | `"foo"` | +| record | object | `{"a": 1}` | +| enum | string | `"FOO"` | +| array | array | `[1]` | +| map | object | `{"a": 1}` | +| fixed | string | `"\u00ff"` | + +For example, a linked-list of 64-bit values may be defined with: +```jsonc +{ + "type": "record", + "name": "LongList", + "aliases": ["LinkedLongs"], // old name for this + "fields" : [ + {"name": "value", "type": "long"}, // each element has a long + {"name": "next", "type": ["null", "LongList"]} // optional next element + ] +} +``` + +### Enums +Enums use the type name "enum" and support the following attributes: + +* _name_: a JSON string providing the name of the enum (required). +* _namespace_, a JSON string that qualifies the name (optional); +* _aliases_: a JSON array of strings, providing alternate names for this enum (optional). +* _doc_: a JSON string providing documentation to the user of this schema (optional). +* _symbols_: a JSON array, listing symbols, as JSON strings (required). All symbols in an enum must be unique; duplicates are prohibited. Every symbol must match the regular expression [A-Za-z_][A-Za-z0-9_]* (the same requirement as for [names]({{< ref "#names" >}} "Names")). +* _default_: A default value for this enumeration, used during resolution when the reader encounters a symbol from the writer that isn't defined in the reader's schema (optional). The value provided here must be a JSON string that's a member of the symbols array. See documentation on schema resolution for how this gets used. + +For example, playing card suits might be defined with: +```json +{ + "type": "enum", + "name": "Suit", + "symbols" : ["SPADES", "HEARTS", "DIAMONDS", "CLUBS"] +} +``` + +### Arrays +Arrays use the type name "array" and support a single attribute: + +* _items_: the schema of the array's items. + +For example, an array of strings is declared with: +```json +{ + "type": "array", + "items" : "string", + "default": [] +} +``` + +### Maps +Maps use the type name "map" and support one attribute: + +* _values_: the schema of the map's values. + +Map keys are assumed to be strings. + +For example, a map from string to long is declared with: +```json +{ + "type": "map", + "values" : "long", + "default": {} +} +``` + +### Unions +Unions, as mentioned above, are represented using JSON arrays. For example, `["null", "string"]` declares a schema which may be either a null or string. + +(Note that when a [default value]({{< ref "#schema-record" >}} "Schema record") is specified for a record field whose type is a union, the type of the default value must match with one element of the union. + +Unions may not contain more than one schema with the same type, except for the named types record, fixed and enum. For example, unions containing two array types or two map types are not permitted, but two types with different names are permitted. (Names permit efficient resolution when reading and writing unions.) + +Unions may not immediately contain other unions. + +### Fixed +Fixed uses the type name "fixed" and supports the following attributes: + +* _name_: a string naming this fixed (required). +* _namespace_, a string that qualifies the name (optional); +* _aliases_: a JSON array of strings, providing alternate names for this enum (optional). +* _size_: an integer, specifying the number of bytes per value (required). + +For example, 16-byte quantity may be declared with: +```json +{"type": "fixed", "size": 16, "name": "md5"} +``` + +### Names +Record, enums and fixed are named types. Each has a fullname that is composed of two parts: a name and a namespace, separated by a dot. Equality of names is defined on the fullname – it is an error to specify two different types with the same name. + +Record fields and enum symbols have names as well (but no namespace). Equality of field names and enum symbols is defined within their scope (the record/enum that defines them). It is an error to define multiple fields or enum symbols with the same name in a single type. Fields and enum symbols across scopes are never equal, so field names and enum symbols can be reused in a different type. + +The name portion of the fullname of named types, record field names, and enum symbols must: + +* start with [A-Za-z_] +* subsequently contain only [A-Za-z0-9_] + +A namespace is a dot-separated sequence of such names. The empty string may also be used as a namespace to indicate the null namespace. Equality of names (including field names and enum symbols) as well as fullnames is case-sensitive. + +The null namespace may not be used in a dot-separated sequence of names. So the grammar for a namespace is: +``` + | [()*] +``` + +In record, enum and fixed definitions, the fullname is determined according to the algorithm below the example: + +``` +{ + "type": "record", + "name": "Example", + "doc": "A simple name (attribute) and no namespace attribute: use the null namespace (\"\"); the fullname is 'Example'.", + "fields": [ + { + "name": "inheritNull", + "type": { + "type": "enum", + "name": "Simple", + "doc": "A simple name (attribute) and no namespace attribute: inherit the null namespace of the enclosing type 'Example'. The fullname is 'Simple'.", + "symbols": ["a", "b"] + } + }, { + "name": "explicitNamespace", + "type": { + "type": "fixed", + "name": "Simple", + "namespace": "explicit", + "doc": "A simple name (attribute) and a namespace (attribute); the fullname is 'explicit.Simple' (this is a different type than of the 'inheritNull' field).", + "size": 12 + } + }, { + "name": "fullName", + "type": { + "type": "record", + "name": "a.full.Name", + "namespace": "ignored", + "doc": "A name attribute with a fullname, so the namespace attribute is ignored. The fullname is 'a.full.Name', and the namespace is 'a.full'.", + "fields": [ + { + "name": "inheritNamespace", + "type": { + "type": "enum", + "name": "Understanding", + "doc": "A simple name (attribute) and no namespace attribute: inherit the namespace of the enclosing type 'a.full.Name'. The fullname is 'a.full.Understanding'.", + "symbols": ["d", "e"] + } + } + ] + } + } + ] +} +``` + +The fullname of a record, enum or fixed definition is determined by the required `name` and optional `namespace` attributes like this: + +* A fullname is specified. If the name specified contains a dot, then it is assumed to be a fullname, and any namespace also specified is ignored. For example, use "name": "org.foo.X" to indicate the fullname org.foo.X. +* A simple name (a name that contains no dots) and namespace are both specified. For example, one might use "name": "X", "namespace": "org.foo" to indicate the fullname org.foo.X. +* A simple name only is specified (a name that contains no dots). In this case the namespace is taken from the most tightly enclosing named schema or protocol, and the fullname is constructed from that namespace and the name. For example, if "name": "X" is specified, and this occurs within a field of the record definition of org.foo.Y, then the fullname is org.foo.X. This also happens if there is no enclosing namespace (i.e., the enclosing schema definition has the null namespace). + +References to previously defined names are as in the latter two cases above: if they contain a dot they are a fullname, if they do not contain a dot, the namespace is the namespace of the enclosing definition. + +Primitive type names (`null`, `boolean`, `int`, `long`, `float`, `double`, `bytes`, `string`) have no namespace and their names may not be defined in any namespace. + +Complex types (`record`, `enum`, `array`, `map`, `fixed`) have no namespace, but their names (as well as `union`) are permitted to be reused as type names. This can be confusing to the human reader, but is always unambiguous for binary serialization. Due to the limitations of JSON encoding, it is a best practice to use a namespace when using these names. + +A schema or protocol may not contain multiple definitions of a fullname. Further, a name must be defined before it is used ("before" in the depth-first, left-to-right traversal of the JSON parse tree, where the types attribute of a protocol is always deemed to come "before" the messages attribute.) + +### Aliases +Named types and fields may have aliases. An implementation may optionally use aliases to map a writer's schema to the reader's. This facilitates both schema evolution as well as processing disparate datasets. + +Aliases function by re-writing the writer's schema using aliases from the reader's schema. For example, if the writer's schema was named "Foo" and the reader's schema is named "Bar" and has an alias of "Foo", then the implementation would act as though "Foo" were named "Bar" when reading. Similarly, if data was written as a record with a field named "x" and is read as a record with a field named "y" with alias "x", then the implementation would act as though "x" were named "y" when reading. + +A type alias may be specified either as a fully namespace-qualified, or relative to the namespace of the name it is an alias for. For example, if a type named "a.b" has aliases of "c" and "x.y", then the fully qualified names of its aliases are "a.c" and "x.y". + +Aliases are alternative names, and thus subject to the same uniqueness constraints as names. Aliases should be valid names, but this is not required: any string is accepted as an alias. When aliases are used "to map a writer's schema to the reader's" (see above), this allows schema evolution to correct illegal names in old schemata. + +## Fixing an invalid, but previously accepted, schema +Over time, rules and validations on schemas have changed. It is therefore possible that a schema used to work with an older version of Avro, but now fails to parse. + +This can have several reasons, as listed below. Each reason also describes a fix, which can be applied using [schema resolution]({{< ref "#schema-resolution" >}}): you fix the problems in the schema in a way that is compatible, and then you can use the new schema to read the old data. + +### Invalid names +Invalid names of types and fields can be corrected by renaming (using an [alias]({{< ref "#aliases" >}})). This works for simple names, namespaces and fullnames. + +This fix is twofold: first, you add the invalid name as an alias to the type/field. Then, you change the name to any valid name. + +### Invalid defaults +Default values are only used to fill in missing data when reading. Invalid defaults create invalid values in these cases. The fix is to correct the default values. + + +## Data Serialization and Deserialization +Binary encoded Avro data does not include type information or field names. The benefit is that the serialized data is small, but as a result a schema must always be used in order to read Avro data correctly. The best way to ensure that the schema is structurally identical to the one used to write the data is to use the exact same schema. + +Therefore, files or systems that store Avro data should always include the writer's schema for that data. Avro-based remote procedure call (RPC) systems must also guarantee that remote recipients of data have a copy of the schema used to write that data. In general, it is advisable that any reader of Avro data should use a schema that is the same (as defined more fully in [Parsing Canonical Form for Schemas]({{< ref "#parsing-canonical-form-for-schemas" >}} "Parsing Canonical Form for Schemas")) as the schema that was used to write the data in order to deserialize it correctly. Deserializing data into a newer schema is accomplished by specifying an additional schema, the results of which are described in [Schema Resolution]({{< ref "#schema-resolution" >}}). + +In general, both serialization and deserialization proceed as a depth-first, left-to-right traversal of the schema, serializing or deserializing primitive types as they are encountered. Therefore, it is possible, though not advisable, to read Avro data with a schema that does not have the same Parsing Canonical Form as the schema with which the data was written. In order for this to work, the serialized primitive values must be compatible, in order value by value, with the items in the deserialization schema. For example, int and long are always serialized the same way, so an int could be deserialized as a long. Since the compatibility of two schemas depends on both the data and the serialization format (eg. binary is more permissive than JSON because JSON includes field names, eg. a long that is too large will overflow an int), it is simpler and more reliable to use schemas with identical Parsing Canonical Form. + +### Encodings +Avro specifies two serialization encodings: binary and JSON. Most applications will use the binary encoding, as it is smaller and faster. But, for debugging and web-based applications, the JSON encoding may sometimes be appropriate. + +### Binary Encoding {#binary-encoding} +Binary encoding does not include field names, self-contained information about the types of individual bytes, nor field or record separators. Therefore readers are wholly reliant on the schema used when the data was encoded. + +#### Primitive Types +Primitive types are encoded in binary as follows: + +* _null_ is written as zero bytes. +* a _boolean_ is written as a single byte whose value is either 0 (false) or 1 (true). +* _int_ and _long_ values are written using [variable-length](https://lucene.apache.org/java/3_5_0/fileformats.html#VInt) [zig-zag](https://code.google.com/apis/protocolbuffers/docs/encoding.html#types) coding. Some examples: + +| *value* | *hex* | +|---|---| +| 0 | 00 | +|-1 | 01 | +| 1 | 02 | +|-2 | 03 | +| 2 | 04 | +|...|...| +|-64 | 7f | +|64 | 80 01| +|...|...| + +* a _float_ is written as 4 bytes. The float is converted into a 32-bit integer using a method equivalent to Java's [floatToRawIntBits](https://docs.oracle.com/javase/8/docs/api/java/lang/Float.html#floatToRawIntBits-float-) and then encoded in little-endian format. +* a _double_ is written as 8 bytes. The double is converted into a 64-bit integer using a method equivalent to Java's [doubleToRawLongBits](https://docs.oracle.com/javase/8/docs/api/java/lang/Double.html#doubleToRawLongBits-double-) and then encoded in little-endian format. +* _bytes_ are encoded as a long followed by that many bytes of data. +* a _string_ is encoded as a long followed by that many bytes of UTF-8 encoded character data. +For example, the three-character string "foo" would be encoded as the long value 3 (encoded as hex 06) followed by the UTF-8 encoding of 'f', 'o', and 'o' (the hex bytes 66 6f 6f): +``` +06 66 6f 6f +``` + +### Complex Types +Complex types are encoded in binary as follows: + +#### Records +A record is encoded by encoding the values of its fields in the order that they are declared. In other words, a record is encoded as just the concatenation of the encodings of its fields. Field values are encoded per their schema. + +For example, the record schema +```json +{ + "type": "record", + "name": "test", + "fields" : [ + {"name": "a", "type": "long"}, + {"name": "b", "type": "string"} + ] +} +``` + +An instance of this record whose a field has value 27 (encoded as hex 36) and whose b field has value "foo" (encoded as hex bytes 06 66 6f 6f), would be encoded simply as the concatenation of these, namely the hex byte sequence: +``` +36 06 66 6f 6f +``` + +#### Enums +An enum is encoded by a int, representing the zero-based position of the symbol in the schema. + +For example, consider the enum: +```json +{"type": "enum", "name": "Foo", "symbols": ["A", "B", "C", "D"] } +``` + +This would be encoded by an int between zero and three, with zero indicating "A", and 3 indicating "D". + +#### Arrays +Arrays are encoded as a series of blocks. Each block consists of a long count value, followed by that many array items. A block with count zero indicates the end of the array. Each item is encoded per the array's item schema. + +If a block's count is negative, its absolute value is used, and the count is followed immediately by a long block size indicating the number of bytes in the block. This block size permits fast skipping through data, e.g., when projecting a record to a subset of its fields. + +For example, the array schema +```json +{"type": "array", "items": "long"} +``` +an array containing the items 3 and 27 could be encoded as the long value 2 (encoded as hex 04) followed by long values 3 and 27 (encoded as hex 06 36) terminated by zero: +``` +04 06 36 00 +``` + +The blocked representation permits one to read and write arrays larger than can be buffered in memory, since one can start writing items without knowing the full length of the array. + +#### Maps {#schema-maps} +Maps are encoded as a series of _blocks_. Each block consists of a `long` _count_ value, followed by that many key/value pairs. A block with count zero indicates the end of the map. Each item is encoded per the map's value schema. + +If a block's count is negative, its absolute value is used, and the count is followed immediately by a `long` block size indicating the number of bytes in the block. This block size permits fast skipping through data, e.g., when projecting a record to a subset of its fields. + +The blocked representation permits one to read and write maps larger than can be buffered in memory, since one can start writing items without knowing the full length of the map. + +#### Unions +A union is encoded by first writing an `int` value indicating the zero-based position within the union of the schema of its value. The value is then encoded per the indicated schema within the union. + +For example, the union schema `["null","string"]` would encode: + +* _null_ as zero (the index of "null" in the union): +`00` +* the string "a" as one (the index of "string" in the union, 1, encoded as hex 02), followed by the serialized string: +`02 02 61` +NOTE: Currently for C/C++ implementations, the positions are practically an int, but theoretically a long. In reality, we don't expect unions with 215M members + +#### Fixed +Fixed instances are encoded using the number of bytes declared in the schema. + +### JSON Encoding +Except for unions, the JSON encoding is the same as is used to encode [field default values]({{< ref "#schema-record" >}}). + +The value of a union is encoded in JSON as follows: + +* if its type is _null_, then it is encoded as a JSON _null_; +* otherwise it is encoded as a JSON object with one name/value pair whose name is the type's name and whose value is the recursively encoded value. For Avro's named types (record, fixed or enum) the user-specified name is used, for other types the type name is used. + +For example, the union schema `["null","string","Foo"]`, where Foo is a record name, would encode: + +* _null_ as _null_; +* the string "a" as `{"string": "a"}` and +* a Foo instance as `{"Foo": {...}}`, where `{...}` indicates the JSON encoding of a Foo instance. + +Note that the original schema is still required to correctly process JSON-encoded data. For example, the JSON encoding does not distinguish between _int_ and _long_, _float_ and _double_, records and maps, enums and strings, etc. + +### Single-object encoding +In some situations a single Avro serialized object is to be stored for a longer period of time. One very common example is storing Avro records for several weeks in an [Apache Kafka](https://kafka.apache.org/) topic. + +In the period after a schema change this persistence system will contain records that have been written with different schemas. So the need arises to know which schema was used to write a record to support schema evolution correctly. In most cases the schema itself is too large to include in the message, so this binary wrapper format supports the use case more effectively. + +#### Single object encoding specification +Single Avro objects are encoded as follows: + +1. A two-byte marker, `C3 01`, to show that the message is Avro and uses this single-record format (version 1). +1. The 8-byte little-endian CRC-64-AVRO [fingerprint]({{< ref "#schema-fingerprints" >}} "Schema fingerprints") of the object's schema. +1. The Avro object encoded using [Avro's binary encoding]({{< ref "#binary-encoding" >}}). + +Implementations use the 2-byte marker to determine whether a payload is Avro. This check helps avoid expensive lookups that resolve the schema from a fingerprint, when the message is not an encoded Avro payload. + +## Sort Order +Avro defines a standard sort order for data. This permits data written by one system to be efficiently sorted by another system. This can be an important optimization, as sort order comparisons are sometimes the most frequent per-object operation. Note also that Avro binary-encoded data can be efficiently ordered without deserializing it to objects. + +Data items may only be compared if they have identical schemas. Pairwise comparisons are implemented recursively with a depth-first, left-to-right traversal of the schema. The first mismatch encountered determines the order of the items. + +Two items with the same schema are compared according to the following rules. + +* _null_ data is always equal. +* _boolean_ data is ordered with false before true. +* _int_, _long_, _float_ and _double_ data is ordered by ascending numeric value. +* _bytes_ and fixed data are compared lexicographically by unsigned 8-bit values. +* _string_ data is compared lexicographically by Unicode code point. Note that since UTF-8 is used as the binary encoding for strings, sorting of bytes and string binary data is identical. +* _array_ data is compared lexicographically by element. +* _enum_ data is ordered by the symbol's position in the enum schema. For example, an enum whose symbols are `["z", "a"]` would sort "z" values before "a" values. +* _union_ data is first ordered by the branch within the union, and, within that, by the type of the branch. For example, an `["int", "string"]` union would order all int values before all string values, with the ints and strings themselves ordered as defined above. +* _record_ data is ordered lexicographically by field. If a field specifies that its order is: + * "ascending", then the order of its values is unaltered. + * "descending", then the order of its values is reversed. + * "ignore", then its values are ignored when sorting. +* _map_ data may not be compared. It is an error to attempt to compare data containing maps unless those maps are in an `"order":"ignore"` record field. + +## Object Container Files +Avro includes a simple object container file format. A file has a schema, and all objects stored in the file must be written according to that schema, using binary encoding. Objects are stored in blocks that may be compressed. Syncronization markers are used between blocks to permit efficient splitting of files for MapReduce processing. + +Files may include arbitrary user-specified metadata. + +A file consists of: + +* A file header, followed by +* one or more file data blocks. + +A file header consists of: + +* Four bytes, ASCII 'O', 'b', 'j', followed by 1. +* file metadata, including the schema. +* The 16-byte, randomly-generated sync marker for this file. + +File metadata is written as if defined by the following [map]({{< ref "#schema-maps" >}}) schema: +```json +{"type": "map", "values": "bytes"} +``` +All metadata properties that start with "avro." are reserved. The following file metadata properties are currently used: + +* **avro.schema** contains the schema of objects stored in the file, as JSON data (required). +* **avro.codec** the name of the compression codec used to compress blocks, as a string. Implementations are required to support the following codecs: "null" and "deflate". If codec is absent, it is assumed to be "null". The codecs are described with more detail below. + +A file header is thus described by the following schema: +```json +{"type": "record", "name": "org.apache.avro.file.Header", + "fields" : [ + {"name": "magic", "type": {"type": "fixed", "name": "Magic", "size": 4}}, + {"name": "meta", "type": {"type": "map", "values": "bytes"}}, + {"name": "sync", "type": {"type": "fixed", "name": "Sync", "size": 16}} + ] +} +``` + +A file data block consists of: + +* A long indicating the count of objects in this block. +* A long indicating the size in bytes of the serialized objects in the current block, after any codec is applied +* The serialized objects. If a codec is specified, this is compressed by that codec. +* The file's 16-byte sync marker. + +A file data block is thus described by the following schema: +```json +{"type": "record", "name": "org.apache.avro.file.DataBlock", + "fields" : [ + {"name": "count", "type": "long"}, + {"name": "data", "type": "bytes"}, + {"name": "sync", "type": {"type": "fixed", "name": "Sync", "size": 16}} + ] +} +``` + +Each block's binary data can be efficiently extracted or skipped without deserializing the contents. The combination of block size, object counts, and sync markers enable detection of corrupt blocks and help ensure data integrity. + +### Required Codecs + +_null_ + +The "null" codec simply passes through data uncompressed. + +_deflate_ + +The "deflate" codec writes the data block using the deflate algorithm as specified in [RFC 1951](https://www.isi.edu/in-notes/rfc1951.txt), and typically implemented using the zlib library. Note that this format (unlike the "zlib format" in RFC 1950) does not have a checksum. + +### Optional Codecs +_bzip2_ + +The "bzip2" codec uses the [bzip2](https://sourceware.org/bzip2/) compression library. + +_snappy_ + +The "snappy" codec uses Google's [Snappy](https://code.google.com/p/snappy/) compression library. Each compressed block is followed by the 4-byte, big-endian CRC32 checksum of the uncompressed data in the block. + +_xz_ + +The "xz" codec uses the [XZ](https://tukaani.org/xz/) compression library. + +_zstandard_ + +The "zstandard" codec uses Facebook's [Zstandard](https://facebook.github.io/zstd/) compression library. + +### Protocol Declaration +Avro protocols describe RPC interfaces. Like schemas, they are defined with JSON text. + +A protocol is a JSON object with the following attributes: + +* _protocol_, a string, the name of the protocol (required); +* _namespace_, an optional string that qualifies the name (optional); +* _doc_, an optional string describing this protocol; +* _types_, an optional list of definitions of named types (records, enums, fixed and errors). An error definition is just like a record definition except it uses "error" instead of "record". Note that forward references to named types are not permitted. +* _messages_, an optional JSON object whose keys are message names and whose values are objects whose attributes are described below. No two messages may have the same name. + +The name and namespace qualification rules defined for schema objects apply to protocols as well. + +### Messages +A message has attributes: + +* a _doc_, an optional description of the message, +* a _request_, a list of named, typed parameter schemas (this has the same form as the fields of a record declaration); +* a _response_ schema; +* an optional union of declared error schemas. The effective union has "string" prepended to the declared union, to permit transmission of undeclared "system" errors. For example, if the declared error union is `["AccessError"]`, then the effective union is `["string", "AccessError"]`. When no errors are declared, the effective error union is `["string"]`. Errors are serialized using the effective union; however, a protocol's JSON declaration contains only the declared union. +* an optional one-way boolean parameter. + +A request parameter list is processed equivalently to an anonymous record. Since record field lists may vary between reader and writer, request parameters may also differ between the caller and responder, and such differences are resolved in the same manner as record field differences. + +The one-way parameter may only be true when the response type is `"null"` and no errors are listed. + +### Sample Protocol +For example, one may define a simple HelloWorld protocol with: +```json +{ + "namespace": "com.acme", + "protocol": "HelloWorld", + "doc": "Protocol Greetings", + + "types": [ + {"name": "Greeting", "type": "record", "fields": [ + {"name": "message", "type": "string"}]}, + {"name": "Curse", "type": "error", "fields": [ + {"name": "message", "type": "string"}]} + ], + + "messages": { + "hello": { + "doc": "Say hello.", + "request": [{"name": "greeting", "type": "Greeting" }], + "response": "Greeting", + "errors": ["Curse"] + } + } +} +``` + +## Protocol Wire Format + +### Message Transport +Messages may be transmitted via different transport mechanisms. + +To the transport, a _message_ is an opaque byte sequence. + +A transport is a system that supports: + +* **transmission of request messages** +* **receipt of corresponding response messages** +Servers may send a response message back to the client corresponding to a request message. The mechanism of correspondence is transport-specific. For example, in HTTP it is implicit, since HTTP directly supports requests and responses. But a transport that multiplexes many client threads over a single socket would need to tag messages with unique identifiers. + +Transports may be either stateless or stateful. In a stateless transport, messaging assumes no established connection state, while stateful transports establish connections that may be used for multiple messages. This distinction is discussed further in the [handshake](#handshake) section below. + +#### HTTP as Transport +When [HTTP](https://www.w3.org/Protocols/rfc2616/rfc2616.html) is used as a transport, each Avro message exchange is an HTTP request/response pair. All messages of an Avro protocol should share a single URL at an HTTP server. Other protocols may also use that URL. Both normal and error Avro response messages should use the 200 (OK) response code. The chunked encoding may be used for requests and responses, but, regardless the Avro request and response are the entire content of an HTTP request and response. The HTTP Content-Type of requests and responses should be specified as "avro/binary". Requests should be made using the POST method. + +HTTP is used by Avro as a stateless transport. + +### Message Framing +Avro messages are _framed_ as a list of buffers. + +Framing is a layer between messages and the transport. It exists to optimize certain operations. + +The format of framed message data is: + +* a series of buffers, where each buffer consists of: + * a four-byte, big-endian _buffer length_, followed by + * that many bytes of _buffer_ data. +* a message is always terminated by a zero-length buffer. + +Framing is transparent to request and response message formats (described below). Any message may be presented as a single or multiple buffers. + +Framing can permit readers to more efficiently get different buffers from different sources and for writers to more efficiently store different buffers to different destinations. In particular, it can reduce the number of times large binary objects are copied. For example, if an RPC parameter consists of a megabyte of file data, that data can be copied directly to a socket from a file descriptor, and, on the other end, it could be written directly to a file descriptor, never entering user space. + +A simple, recommended, framing policy is for writers to create a new segment whenever a single binary object is written that is larger than a normal output buffer. Small objects are then appended in buffers, while larger objects are written as their own buffers. When a reader then tries to read a large object the runtime can hand it an entire buffer directly, without having to copy it. + +### Handshake +The purpose of the handshake is to ensure that the client and the server have each other's protocol definition, so that the client can correctly deserialize responses, and the server can correctly deserialize requests. Both clients and servers should maintain a cache of recently seen protocols, so that, in most cases, a handshake will be completed without extra round-trip network exchanges or the transmission of full protocol text. + +RPC requests and responses may not be processed until a handshake has been completed. With a stateless transport, all requests and responses are prefixed by handshakes. With a stateful transport, handshakes are only attached to requests and responses until a successful handshake response has been returned over a connection. After this, request and response payloads are sent without handshakes for the lifetime of that connection. + +The handshake process uses the following record schemas: +```json +{ + "type": "record", + "name": "HandshakeRequest", "namespace":"org.apache.avro.ipc", + "fields": [ + {"name": "clientHash", + "type": {"type": "fixed", "name": "MD5", "size": 16}}, + {"name": "clientProtocol", "type": ["null", "string"]}, + {"name": "serverHash", "type": "MD5"}, + {"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]} + ] +} +{ + "type": "record", + "name": "HandshakeResponse", "namespace": "org.apache.avro.ipc", + "fields": [ + {"name": "match", + "type": {"type": "enum", "name": "HandshakeMatch", + "symbols": ["BOTH", "CLIENT", "NONE"]}}, + {"name": "serverProtocol", + "type": ["null", "string"]}, + {"name": "serverHash", + "type": ["null", {"type": "fixed", "name": "MD5", "size": 16}]}, + {"name": "meta", + "type": ["null", {"type": "map", "values": "bytes"}]} + ] +} +``` + +* A client first prefixes each request with a `HandshakeRequest` containing just the hash of its protocol and of the server's protocol (`clientHash!=null, clientProtocol=null, serverHash!=null`), where the hashes are 128-bit MD5 hashes of the JSON protocol text. If a client has never connected to a given server, it sends its hash as a guess of the server's hash, otherwise it sends the hash that it previously obtained from this server. +The server responds with a HandshakeResponse containing one of: + * `match=BOTH, serverProtocol=null, serverHash=null` if the client sent the valid hash of the server's protocol and the server knows what protocol corresponds to the client's hash. In this case, the request is complete and the response data immediately follows the HandshakeResponse. + * `match=CLIENT, serverProtocol!=null, serverHash!=null` if the server has previously seen the client's protocol, but the client sent an incorrect hash of the server's protocol. The request is complete and the response data immediately follows the HandshakeResponse. The client must use the returned protocol to process the response and should also cache that protocol and its hash for future interactions with this server. + * `match=NONE` if the server has not previously seen the client's protocol. The serverHash and serverProtocol may also be non-null if the server's protocol hash was incorrect. +In this case the client must then re-submit its request with its protocol text (`clientHash!=null, clientProtocol!=null, serverHash!=null`) and the server should respond with a successful match (match=BOTH, serverProtocol=null, serverHash=null) as above. + +The meta field is reserved for future handshake enhancements. + +### Call Format +A _call_ consists of a request message paired with its resulting response or error message. Requests and responses contain extensible metadata, and both kinds of messages are framed as described above. + +The format of a call request is: + +* _request metadata_, a map with values of type bytes +* the _message name_, an Avro string, followed by +* the _message parameters_. Parameters are serialized according to the message's request declaration. +When the empty string is used as a message name a server should ignore the parameters and return an empty response. A client may use this to ping a server or to perform a handshake without sending a protocol message. + +When a message is declared one-way and a stateful connection has been established by a successful handshake response, no response data is sent. Otherwise the format of the call response is: + +* _response metadata_, a map with values of type bytes +* a one-byte error _flag_ boolean, followed by either: + * if the error flag is false, the message _response_, serialized per the message's response schema. + * if the error flag is true, the _error_, serialized per the message's effective error union schema. + +### Schema Resolution {#schema-resolution} +A reader of Avro data, whether from an RPC or a file, can always parse that data because the original schema must be provided along with the data. However, the reader may be programmed to read data into a different schema. For example, if the data was written with a different version of the software than it is read, then fields may have been added or removed from records. This section specifies how such schema differences should be resolved. + +We refer to the schema used to write the data as the writer's schema, and the schema that the application expects the reader's schema. Differences between these should be resolved as follows: + +* It is an error if the two schemas do not _match_. +To match, one of the following must hold: + * both schemas are arrays whose item types match + * both schemas are maps whose value types match + * both schemas are enums whose (unqualified) names match + * both schemas are fixed whose sizes and (unqualified) names match + * both schemas are records with the same (unqualified) name + * either schema is a union + * both schemas have same primitive type + * the writer's schema may be promoted to the reader's as follows: + * int is promotable to long, float, or double + * long is promotable to float or double + * float is promotable to double + * string is promotable to bytes + * bytes is promotable to string +* **if both are records**: + * the ordering of fields may be different: fields are matched by name. + * schemas for fields with the same name in both records are resolved recursively. + * if the writer's record contains a field with a name not present in the reader's record, the writer's value for that field is ignored. + * if the reader's record schema has a field that contains a default value, and writer's schema does not have a field with the same name, then the reader should use the default value from its field. + * if the reader's record schema has a field with no default value, and writer's schema does not have a field with the same name, an error is signalled. +* **if both are enums**: +if the writer's symbol is not present in the reader's enum and the reader has a default value, then that value is used, otherwise an error is signalled. + +* **if both are arrays**: +This resolution algorithm is applied recursively to the reader's and writer's array item schemas. + +* **if both are maps**: +This resolution algorithm is applied recursively to the reader's and writer's value schemas. + +* **if both are unions**: +The first schema in the reader's union that matches the selected writer's union schema is recursively resolved against it. if none match, an error is signalled. + +* **if reader's is a union, but writer's is not** +The first schema in the reader's union that matches the writer's schema is recursively resolved against it. If none match, an error is signalled. + +* **if writer's is a union, but reader's is not** +If the reader's schema matches the selected writer's schema, it is recursively resolved against it. If they do not match, an error is signalled. + +A schema's _doc_ fields are ignored for the purposes of schema resolution. Hence, the _doc_ portion of a schema may be dropped at serialization. + +### Parsing Canonical Form for Schemas {#parsing-canonical-form-for-schemas} +One of the defining characteristics of Avro is that a reader must use the schema used by the writer of the data in order to know how to read the data. This assumption results in a data format that's compact and also amenable to many forms of schema evolution. However, the specification so far has not defined what it means for the reader to have the "same" schema as the writer. Does the schema need to be textually identical? Well, clearly adding or removing some whitespace to a JSON expression does not change its meaning. At the same time, reordering the fields of records clearly does change the meaning. So what does it mean for a reader to have "the same" schema as a writer? + +Parsing Canonical Form is a transformation of a writer's schema that let's us define what it means for two schemas to be "the same" for the purpose of reading data written against the schema. It is called Parsing Canonical Form because the transformations strip away parts of the schema, like "doc" attributes, that are irrelevant to readers trying to parse incoming data. It is called Canonical Form because the transformations normalize the JSON text (such as the order of attributes) in a way that eliminates unimportant differences between schemas. If the Parsing Canonical Forms of two different schemas are textually equal, then those schemas are "the same" as far as any reader is concerned, i.e., there is no serialized data that would allow a reader to distinguish data generated by a writer using one of the original schemas from data generated by a writing using the other original schema. (We sketch a proof of this property in a companion document.) + +The next subsection specifies the transformations that define Parsing Canonical Form. But with a well-defined canonical form, it can be convenient to go one step further, transforming these canonical forms into simple integers ("fingerprints") that can be used to uniquely identify schemas. The subsection after next recommends some standard practices for generating such fingerprints. + +#### Transforming into Parsing Canonical Form +Assuming an input schema (in JSON form) that's already UTF-8 text for a _valid_ Avro schema (including all quotes as required by JSON), the following transformations will produce its Parsing Canonical Form: + +* [PRIMITIVES] Convert primitive schemas to their simple form (e.g., int instead of `{"type":"int"}`). +* [FULLNAMES] Replace short names with fullnames, using applicable namespaces to do so. Then eliminate namespace attributes, which are now redundant. +* [STRIP] Keep only attributes that are relevant to parsing data, which are: _type_, _name_, _fields_, _symbols_, _items_, _values_, _size_. Strip all others (e.g., _doc_ and _aliases_). +* [ORDER] Order the appearance of fields of JSON objects as follows: _name_, _type_, _fields_, _symbols_, _items_, _values_, _size_. For example, if an object has _type_, _name_, and _size_ fields, then the _name_ field should appear first, followed by the _type_ and then the _size_ fields. +* [STRINGS] For all JSON string literals in the schema text, replace any escaped characters (e.g., \uXXXX escapes) with their UTF-8 equivalents. +* [INTEGERS] Eliminate quotes around and any leading zeros in front of JSON integer literals (which appear in the _size_ attributes of _fixed_ schemas). +* [WHITESPACE] Eliminate all whitespace in JSON outside of string literals. + +#### Schema Fingerprints {#schema-fingerprints} +"[A] fingerprinting algorithm is a procedure that maps an arbitrarily large data item (such as a computer file) to a much shorter bit string, its fingerprint, that uniquely identifies the original data for all practical purposes" (quoted from [Wikipedia](https://en.wikipedia.org/wiki/Fingerprint_(computing))). In the Avro context, fingerprints of Parsing Canonical Form can be useful in a number of applications; for example, to cache encoder and decoder objects, to tag data items with a short substitute for the writer's full schema, and to quickly negotiate common-case schemas between readers and writers. + +In designing fingerprinting algorithms, there is a fundamental trade-off between the length of the fingerprint and the probability of collisions. To help application designers find appropriate points within this trade-off space, while encouraging interoperability and ease of implementation, we recommend using one of the following three algorithms when fingerprinting Avro schemas: + +* When applications can tolerate longer fingerprints, we recommend using the [SHA-256 digest algorithm](https://en.wikipedia.org/wiki/SHA-2) to generate 256-bit fingerprints of Parsing Canonical Forms. Most languages today have SHA-256 implementations in their libraries. +* At the opposite extreme, the smallest fingerprint we recommend is a 64-bit [Rabin fingerprint](https://en.wikipedia.org/wiki/Rabin_fingerprint). Below, we provide pseudo-code for this algorithm that can be easily translated into any programming language. 64-bit fingerprints should guarantee uniqueness for schema caches of up to a million entries (for such a cache, the chance of a collision is 3E-8). We don't recommend shorter fingerprints, as the chances of collisions is too great (for example, with 32-bit fingerprints, a cache with as few as 100,000 schemas has a 50% chance of having a collision). +* Between these two extremes, we recommend using the [MD5 message digest](https://en.wikipedia.org/wiki/MD5) to generate 128-bit fingerprints. These make sense only where very large numbers of schemas are being manipulated (tens of millions); otherwise, 64-bit fingerprints should be sufficient. As with SHA-256, MD5 implementations are found in most libraries today. + +These fingerprints are not meant to provide any security guarantees, even the longer SHA-256-based ones. Most Avro applications should be surrounded by security measures that prevent attackers from writing random data and otherwise interfering with the consumers of schemas. We recommend that these surrounding mechanisms be used to prevent collision and pre-image attacks (i.e., "forgery") on schema fingerprints, rather than relying on the security properties of the fingerprints themselves. + +Rabin fingerprints are [cyclic redundancy checks](https://en.wikipedia.org/wiki/Cyclic_redundancy_check) computed using irreducible polynomials. In the style of the Appendix of [RFC 1952](https://www.ietf.org/rfc/rfc1952.txt) (pg 10), which defines the CRC-32 algorithm, here's our definition of the 64-bit AVRO fingerprinting algorithm: +```java +long fingerprint64(byte[] buf) { + if (FP_TABLE == null) initFPTable(); + long fp = EMPTY; + for (int i = 0; i < buf.length; i++) + fp = (fp >>> 8) ^ FP_TABLE[(int)(fp ^ buf[i]) & 0xff]; + return fp; +} + +static long EMPTY = 0xc15d213aa4d7a795L; +static long[] FP_TABLE = null; + +void initFPTable() { + FP_TABLE = new long[256]; + for (int i = 0; i < 256; i++) { + long fp = i; + for (int j = 0; j < 8; j++) + fp = (fp >>> 1) ^ (EMPTY & -(fp & 1L)); + FP_TABLE[i] = fp; + } +} +``` + +Readers interested in the mathematics behind this algorithm may want to read [Chapter 14 of the Second Edition of Hacker's Delight](https://books.google.com/books?id=XD9iAwAAQBAJ&pg=PA319). (Unlike RFC-1952 and the book chapter, we prepend a single one bit to messages. We do this because CRCs ignore leading zero bits, which can be problematic. Our code prepends a one-bit by initializing fingerprints using EMPTY, rather than initializing using zero as in RFC-1952 and the book chapter.) + +## Logical Types +A logical type is an Avro primitive or complex type with extra attributes to represent a derived type. The attribute `logicalType` must always be present for a logical type, and is a string with the name of one of the logical types listed later in this section. Other attributes may be defined for particular logical types. + +A logical type is always serialized using its underlying Avro type so that values are encoded in exactly the same way as the equivalent Avro type that does not have a `logicalType` attribute. Language implementations may choose to represent logical types with an appropriate native type, although this is not required. + +Language implementations must ignore unknown logical types when reading, and should use the underlying Avro type. If a logical type is invalid, for example a decimal with scale greater than its precision, then implementations should ignore the logical type and use the underlying Avro type. + +### Decimal +The `decimal` logical type represents an arbitrary-precision signed decimal number of the form _unscaled × 10-scale_. + +A `decimal` logical type annotates Avro _bytes_ or _fixed_ types. The byte array must contain the two's-complement representation of the unscaled integer value in big-endian byte order. The scale is fixed, and is specified using an attribute. + +The following attributes are supported: + +* _scale_, a JSON integer representing the scale (optional). If not specified the scale is 0. +* _precision_, a JSON integer representing the (maximum) precision of decimals stored in this type (required). +For example, the following schema represents decimal numbers with a maximum precision of 4 and a scale of 2: +```json +{ + "type": "bytes", + "logicalType": "decimal", + "precision": 4, + "scale": 2 +} +``` +Precision must be a positive integer greater than zero. If the underlying type is a _fixed_, then the precision is limited by its size. An array of length n can store at most _floor(log10(28 × n - 1 - 1))_ base-10 digits of precision. + +Scale must be zero or a positive integer less than or equal to the precision. + +For the purposes of schema resolution, two schemas that are `decimal` logical types _match_ if their scales and precisions match. + +**alternative** + +As it's not always possible to fix scale and precision in advance for a decimal field, `big-decimal` is another `decimal` logical type restrict to Avro _bytes_. + +_Currently only available in Java and Rust_. + +```json +{ + "type": "bytes", + "logicalType": "big-decimal" +} +``` +Here, as scale property is stored in value itself it needs more bytes than preceding `decimal` type, but it allows more flexibility. + +### UUID + +The `uuid` logical type represents a random generated universally unique identifier (UUID). + +A `uuid` logical type annotates an Avro `string` or `fixed` of length 16. Both the string and `fixed` byte layout have to conform with [RFC-4122](https://www.ietf.org/rfc/rfc4122.txt). + +The following schemas represent a uuid: + +```json +{ + "type": "string", + "logicalType": "uuid" +} +``` + +```json +{ + "type": "fixed", + "size": 16, + "logicalType": "uuid" +} +``` + +### Date +The `date` logical type represents a date within the calendar, with no reference to a particular time zone or time of day. + +A `date` logical type annotates an Avro `int`, where the int stores the number of days from the unix epoch, 1 January 1970 (ISO calendar). + +The following schema represents a date: +```json +{ + "type": "int", + "logicalType": "date" +} +``` + +### Time (millisecond precision) {#time_ms} +The `time-millis` logical type represents a time of day, with no reference to a particular calendar, time zone or date, with a precision of one millisecond. + +A `time-millis` logical type annotates an Avro `int`, where the int stores the number of milliseconds after midnight, 00:00:00.000. + +### Time (microsecond precision) +The `time-micros` logical type represents a time of day, with no reference to a particular calendar, time zone or date, with a precision of one microsecond. + +A `time-micros` logical type annotates an Avro `long`, where the long stores the number of microseconds after midnight, 00:00:00.000000. + +### Timestamps {#timestamps} + +The `timestamp-{millis,micros,nanos}` logical type represents an instant on the global timeline, independent of a particular time zone or calendar. Upon reading a value back, we can only reconstruct the instant, but not the original representation. In practice, such timestamps are typically displayed to users in their local time zones, therefore they may be displayed differently depending on the execution environment. + +- `timestamp-millis`: logical type annotates an Avro `long`, where the long stores the number of milliseconds from the unix epoch, 1 January 1970 00:00:00.000. +- `timestamp-micros`: logical type annotates an Avro `long`, where the long stores the number of microseconds from the unix epoch, 1 January 1970 00:00:00.000000. +- `timestamp-nanos`: logical type annotates an Avro `long`, where the long stores the number of nanoseconds from the unix epoch, 1 January 1970 00:00:00.000000000. + +Example: Given an event at noon local time (12:00) on January 1, 2000, in Helsinki where the local time was two hours east of UTC (UTC+2). The timestamp is first shifted to UTC 2000-01-01T10:00:00 and that is then converted to Avro long 946720800000 (milliseconds) and written. + +### Local Timestamps {#local_timestamp} + +The `local-timestamp-{millis,micros,nanos}` logical type represents a timestamp in a local timezone, regardless of what specific time zone is considered local. + +- `local-timestamp-millis`: logical type annotates an Avro `long`, where the long stores the number of milliseconds, from 1 January 1970 00:00:00.000. +- `local-timestamp-micros`: logical type annotates an Avro `long`, where the long stores the number of microseconds, from 1 January 1970 00:00:00.000000. +- `local-timestamp-nanos`: logical type annotates an Avro `long`, where the long stores the number of nanoseconds, from 1 January 1970 00:00:00.000000000. + +Example: Given an event at noon local time (12:00) on January 1, 2000, in Helsinki where the local time was two hours east of UTC (UTC+2). The timestamp is converted to Avro long 946728000000 (milliseconds) and then written. + +### Duration +The `duration` logical type represents an amount of time defined by a number of months, days and milliseconds. This is not equivalent to a number of milliseconds, because, depending on the moment in time from which the duration is measured, the number of days in the month and number of milliseconds in a day may differ. Other standard periods such as years, quarters, hours and minutes can be expressed through these basic periods. + +A `duration` logical type annotates Avro `fixed` type of size 12, which stores three little-endian unsigned integers that represent durations at different granularities of time. The first stores a number in months, the second stores a number in days, and the third stores a number in milliseconds. diff --git a/doc/content/en/docs/1.12.0/_index.md b/doc/content/en/docs/1.12.0/_index.md new file mode 100644 index 00000000000..9eae28efb30 --- /dev/null +++ b/doc/content/en/docs/1.12.0/_index.md @@ -0,0 +1,59 @@ +--- +title: "Apache Avroâ„ĸ 1.12.0 Documentation" +linkTitle: "1.12.0" +type: docs +weight: -1120 +--- + + + +## Introduction + +Apache Avroâ„ĸ is a data serialization system. + +Avro provides: + +* Rich data structures. +* A compact, fast, binary data format. +* A container file, to store persistent data. +* Remote procedure call (RPC). +* Simple integration with dynamic languages. Code generation is not required to read or write data files nor to use or implement RPC protocols. Code generation as an optional optimization, only worth implementing for statically typed languages. + +## Schemas + +Avro relies on schemas. When Avro data is read, the schema used when writing it is always present. This permits each datum to be written with no per-value overheads, making serialization both fast and small. This also facilitates use with dynamic, scripting languages, since data, together with its schema, is fully self-describing. + +When Avro data is stored in a file, its schema is stored with it, so that files may be processed later by any program. If the program reading the data expects a different schema this can be easily resolved, since both schemas are present. + +When Avro is used in RPC, the client and server exchange schemas in the connection handshake. (This can be optimized so that, for most calls, no schemas are actually transmitted.) Since both client and server both have the other's full schema, correspondence between same named fields, missing fields, extra fields, etc. can all be easily resolved. + +Avro schemas are defined with JSON . This facilitates implementation in languages that already have JSON libraries. + +## Comparison with other systems + +Avro provides functionality similar to systems such as [Thrift](https://thrift.apache.org/), [Protocol Buffers](https://code.google.com/p/protobuf/), etc. Avro differs from these systems in the following fundamental aspects. + +* Dynamic typing: Avro does not require that code be generated. Data is always accompanied by a schema that permits full processing of that data without code generation, static datatypes, etc. This facilitates construction of generic data-processing systems and languages. +* Untagged data: Since the schema is present when data is read, considerably less type information need be encoded with data, resulting in smaller serialization size. +* No manually-assigned field IDs: When a schema changes, both the old and new schema are always present when processing data, so differences may be resolved symbolically, using field names. + + diff --git a/doc/content/en/docs/1.12.0/api-c++.md b/doc/content/en/docs/1.12.0/api-c++.md new file mode 100644 index 00000000000..0ee54696c07 --- /dev/null +++ b/doc/content/en/docs/1.12.0/api-c++.md @@ -0,0 +1,29 @@ +--- +title: "C++ API" +linkTitle: "C++ API" +weight: 102 +manualLink: /docs/1.12.0/api/cpp/html/ +--- + + + +The C++ API documentation can be found here. diff --git a/doc/content/en/docs/1.12.0/api-c.md b/doc/content/en/docs/1.12.0/api-c.md new file mode 100644 index 00000000000..739f7758f58 --- /dev/null +++ b/doc/content/en/docs/1.12.0/api-c.md @@ -0,0 +1,29 @@ +--- +title: "C API" +linkTitle: "C API" +weight: 101 +manualLink: /docs/1.12.0/api/c/ +--- + + + +The C API documentation can be found here. diff --git a/doc/content/en/docs/1.12.0/api-csharp.md b/doc/content/en/docs/1.12.0/api-csharp.md new file mode 100644 index 00000000000..30e4eedb0ae --- /dev/null +++ b/doc/content/en/docs/1.12.0/api-csharp.md @@ -0,0 +1,29 @@ +--- +title: "C# API" +linkTitle: "C# API" +weight: 103 +manualLink: /docs/1.12.0/api/csharp/html/ +--- + + + +The C# API documentation can be found here. diff --git a/doc/content/en/docs/1.12.0/api-java.md b/doc/content/en/docs/1.12.0/api-java.md new file mode 100644 index 00000000000..e1478755095 --- /dev/null +++ b/doc/content/en/docs/1.12.0/api-java.md @@ -0,0 +1,29 @@ +--- +title: "Java API" +linkTitle: "Java API" +weight: 100 +manualLink: /docs/1.12.0/api/java/ +--- + + + +The Javadocs can be found here. diff --git a/doc/content/en/docs/1.12.0/api-py.md b/doc/content/en/docs/1.12.0/api-py.md new file mode 100644 index 00000000000..94f54950095 --- /dev/null +++ b/doc/content/en/docs/1.12.0/api-py.md @@ -0,0 +1,29 @@ +--- +title: "Python API" +linkTitle: "Python API" +weight: 104 +manualLink: /docs/1.12.0/api/py/html/ +--- + + + +The Python API documentation can be found here. diff --git a/doc/content/en/docs/1.12.0/trevni/css/maven-base.css b/doc/content/en/docs/1.12.0/trevni/css/maven-base.css new file mode 100644 index 00000000000..45dc441c914 --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/css/maven-base.css @@ -0,0 +1,168 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +body { + margin: 0px; + padding: 0px; +} +table { + padding:0px; + width: 100%; + margin-left: -2px; + margin-right: -2px; +} +acronym { + cursor: help; + border-bottom: 1px dotted #feb; +} +table.bodyTable th, table.bodyTable td { + padding: 2px 4px 2px 4px; + vertical-align: top; +} +div.clear { + clear:both; + visibility: hidden; +} +div.clear hr { + display: none; +} +#bannerLeft, #bannerRight { + font-size: xx-large; + font-weight: bold; +} +#bannerLeft img, #bannerRight img { + margin: 0px; +} +.xleft, #bannerLeft img { + float:left; +} +.xright, #bannerRight { + float:right; +} +#banner { + padding: 0px; +} +#breadcrumbs { + padding: 3px 10px 3px 10px; +} +#leftColumn { + width: 170px; + float:left; + overflow: auto; +} +#bodyColumn { + margin-right: 1.5em; + margin-left: 197px; +} +#legend { + padding: 8px 0 8px 0; +} +#navcolumn { + padding: 8px 4px 0 8px; +} +#navcolumn h5 { + margin: 0; + padding: 0; + font-size: small; +} +#navcolumn ul { + margin: 0; + padding: 0; + font-size: small; +} +#navcolumn li { + list-style-type: none; + background-image: none; + background-repeat: no-repeat; + background-position: 0 0.4em; + padding-left: 16px; + list-style-position: outside; + line-height: 1.2em; + font-size: smaller; +} +#navcolumn li.expanded { + background-image: url(../images/expanded.gif); +} +#navcolumn li.collapsed { + background-image: url(../images/collapsed.gif); +} +#navcolumn li.none { + text-indent: -1em; + margin-left: 1em; +} +#poweredBy { + text-align: center; +} +#navcolumn img { + margin-top: 10px; + margin-bottom: 3px; +} +#poweredBy img { + display:block; + margin: 20px 0 20px 17px; +} +#search img { + margin: 0px; + display: block; +} +#search #q, #search #btnG { + border: 1px solid #999; + margin-bottom:10px; +} +#search form { + margin: 0px; +} +#lastPublished { + font-size: x-small; +} +.navSection { + margin-bottom: 2px; + padding: 8px; +} +.navSectionHead { + font-weight: bold; + font-size: x-small; +} +.section { + padding: 4px; +} +#footer { + padding: 3px 10px 3px 10px; + font-size: x-small; +} +#breadcrumbs { + font-size: x-small; + margin: 0pt; +} +.source { + padding: 12px; + margin: 1em 7px 1em 7px; +} +.source pre { + margin: 0px; + padding: 0px; +} +#navcolumn img.imageLink, .imageLink { + padding-left: 0px; + padding-bottom: 0px; + padding-top: 0px; + padding-right: 2px; + border: 0px; + margin: 0px; +} diff --git a/doc/content/en/docs/1.12.0/trevni/css/maven-theme.css b/doc/content/en/docs/1.12.0/trevni/css/maven-theme.css new file mode 100644 index 00000000000..d3407e8ba8c --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/css/maven-theme.css @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +body { + padding: 0px 0px 10px 0px; +} +body, td, select, input, li{ + font-family: Verdana, Helvetica, Arial, sans-serif; + font-size: 13px; +} +code{ + font-family: Courier, monospace; + font-size: 13px; +} +a { + text-decoration: none; +} +a:link { + color:#36a; +} +a:visited { + color:#47a; +} +a:active, a:hover { + color:#69c; +} +#legend li.externalLink { + background: url(../images/external.png) left top no-repeat; + padding-left: 18px; +} +a.externalLink, a.externalLink:link, a.externalLink:visited, a.externalLink:active, a.externalLink:hover { + background: url(../images/external.png) right center no-repeat; + padding-right: 18px; +} +#legend li.newWindow { + background: url(../images/newwindow.png) left top no-repeat; + padding-left: 18px; +} +a.newWindow, a.newWindow:link, a.newWindow:visited, a.newWindow:active, a.newWindow:hover { + background: url(../images/newwindow.png) right center no-repeat; + padding-right: 18px; +} +h2 { + padding: 4px 4px 4px 6px; + border: 1px solid #999; + color: #900; + background-color: #ddd; + font-weight:900; + font-size: x-large; +} +h3 { + padding: 4px 4px 4px 6px; + border: 1px solid #aaa; + color: #900; + background-color: #eee; + font-weight: normal; + font-size: large; +} +h4 { + padding: 4px 4px 4px 6px; + border: 1px solid #bbb; + color: #900; + background-color: #fff; + font-weight: normal; + font-size: large; +} +h5 { + padding: 4px 4px 4px 6px; + color: #900; + font-size: medium; +} +p { + line-height: 1.3em; + font-size: small; +} +#breadcrumbs { + border-top: 1px solid #aaa; + border-bottom: 1px solid #aaa; + background-color: #ccc; +} +#leftColumn { + margin: 10px 0 0 5px; + border: 1px solid #999; + background-color: #eee; + padding-bottom: 3px; /* IE-9 scrollbar-fix */ +} +#navcolumn h5 { + font-size: smaller; + border-bottom: 1px solid #aaaaaa; + padding-top: 2px; + color: #000; +} + +table.bodyTable th { + color: white; + background-color: #bbb; + text-align: left; + font-weight: bold; +} + +table.bodyTable th, table.bodyTable td { + font-size: 1em; +} + +table.bodyTable tr.a { + background-color: #ddd; +} + +table.bodyTable tr.b { + background-color: #eee; +} + +.source { + border: 1px solid #999; +} +dl { + padding: 4px 4px 4px 6px; + border: 1px solid #aaa; + background-color: #ffc; +} +dt { + color: #900; +} +#organizationLogo img, #projectLogo img, #projectLogo span{ + margin: 8px; +} +#banner { + border-bottom: 1px solid #fff; +} +.errormark, .warningmark, .donemark, .infomark { + background: url(../images/icon_error_sml.gif) no-repeat; +} + +.warningmark { + background-image: url(../images/icon_warning_sml.gif); +} + +.donemark { + background-image: url(../images/icon_success_sml.gif); +} + +.infomark { + background-image: url(../images/icon_info_sml.gif); +} + diff --git a/doc/content/en/docs/1.12.0/trevni/css/print.css b/doc/content/en/docs/1.12.0/trevni/css/print.css new file mode 100644 index 00000000000..18fcbad7083 --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/css/print.css @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#banner, #footer, #leftcol, #breadcrumbs, .docs #toc, .docs .courtesylinks, #leftColumn, #navColumn { + display: none !important; +} +#bodyColumn, body.docs div.docs { + margin: 0 !important; + border: none !important +} diff --git a/doc/content/en/docs/1.12.0/trevni/css/site.css b/doc/content/en/docs/1.12.0/trevni/css/site.css new file mode 100644 index 00000000000..055e7e286ad --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/css/site.css @@ -0,0 +1 @@ +/* You can override this file with your own styles */ \ No newline at end of file diff --git a/doc/content/en/docs/1.12.0/trevni/dependencies.html b/doc/content/en/docs/1.12.0/trevni/dependencies.html new file mode 100644 index 00000000000..f5fb35c4e03 --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/dependencies.html @@ -0,0 +1,503 @@ + + + + + + + + Trevni Java – Project Dependencies + + + + + + + + + +
+
+
+

Project Dependencies

+

compile

+

The following is a list of compile dependencies for this project. These dependencies are required to compile and run the application:

+ + + + + + + + + + + + +
GroupIdArtifactIdVersionTypeLicenses
org.slf4jslf4j-api2.0.13jarMIT License
+

test

+

The following is a list of test dependencies for this project. These dependencies are only required to compile and run unit tests for the application:

+ + + + + + + + + + + + + + + + + + + + + + + + +
GroupIdArtifactIdVersionTypeLicenses
org.junit.jupiterjunit-jupiter5.10.3jarEclipse Public License v2.0
org.junit.vintagejunit-vintage-engine5.10.3jarEclipse Public License v2.0
org.slf4jslf4j-simple2.0.13jarMIT License
+

Project Transitive Dependencies

+

The following is a list of transitive dependencies for this project. Transitive dependencies are the dependencies of the project dependencies.

+

test

+

The following is a list of test dependencies for this project. These dependencies are only required to compile and run unit tests for the application:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
GroupIdArtifactIdVersionTypeLicenses
junitjunit4.13.2jarEclipse Public License 1.0
org.apiguardianapiguardian-api1.1.2jarThe Apache License, Version 2.0
org.hamcresthamcrest-core1.3jarNew BSD License
org.junit.jupiterjunit-jupiter-api5.10.3jarEclipse Public License v2.0
org.junit.jupiterjunit-jupiter-engine5.10.3jarEclipse Public License v2.0
org.junit.jupiterjunit-jupiter-params5.10.3jarEclipse Public License v2.0
org.junit.platformjunit-platform-commons1.10.3jarEclipse Public License v2.0
org.junit.platformjunit-platform-engine1.10.3jarEclipse Public License v2.0
org.opentest4jopentest4j1.3.0jarThe Apache License, Version 2.0
+

Project Dependency Graph

+

Dependency Tree

+
+

Licenses

+

The Apache License, Version 2.0: org.apiguardian:apiguardian-api, org.opentest4j:opentest4j

+

Eclipse Public License 1.0: JUnit

+

MIT License: SLF4J API Module, SLF4J Simple Provider

+

Apache-2.0: Trevni Java

+

Eclipse Public License v2.0: JUnit Jupiter (Aggregator), JUnit Jupiter API, JUnit Jupiter Engine, JUnit Jupiter Params, JUnit Platform Commons, JUnit Platform Engine API, JUnit Vintage Engine

+

New BSD License: Hamcrest Core

+

Dependency File Details

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FilenameSizeEntriesClassesPackagesJava VersionDebug Information
junit-4.13.2.jar384.6 kB389350321.5Yes
apiguardian-api-1.1.2.jar6.8 kB9329Yes
hamcrest-core-1.3.jar45 kB524531.5Yes
junit-jupiter-5.10.3.jar6.4 kB5119No
junit-jupiter-api-5.10.3.jar211.4 kB19818389Yes
junit-jupiter-engine-5.10.3.jar244.7 kB14713099Yes
junit-jupiter-params-5.10.3.jar586 kB381347229Yes
junit-platform-commons-1.10.3.jar106.2 kB644479Yes
junit-platform-engine-1.10.3.jar204.8 kB153136109Yes
junit-vintage-engine-5.10.3.jar67.5 kB493569Yes
opentest4j-1.3.0.jar14.3 kB15929Yes
slf4j-api-2.0.13.jar68.6 kB705559Yes
slf4j-simple-2.0.13.jar15.7 kB22729Yes
TotalSizeEntriesClassesPackagesJava VersionDebug Information
132 MB15541345109912
compile: 1compile: 68.6 kBcompile: 70compile: 55compile: 59compile: 1
test: 12test: 1.9 MBtest: 1484test: 1290test: 1049test: 11
+
+
+
+
+
+ + + diff --git a/doc/content/en/docs/1.12.0/trevni/dependency-convergence.html b/doc/content/en/docs/1.12.0/trevni/dependency-convergence.html new file mode 100644 index 00000000000..3b5a826a7eb --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/dependency-convergence.html @@ -0,0 +1,339 @@ + + + + + + + + Trevni Java – Reactor Dependency Convergence + + + + + + + + + +
+
+
+

Reactor Dependency Convergence

+ + + +
+ Legend: +
[Error]At least one dependency has a differing version of the dependency or has SNAPSHOT dependencies.

+ + + + + + + + + + + + + + + + + + + + + +
+ Statistics: +
Number of modules:4
Number of dependencies (NOD):114
Number of unique artifacts (NOA):131
Number of version-conflicting artifacts (NOC):12
Number of SNAPSHOT artifacts (NOS):0
Convergence (NOD/NOA):[Error] 87 %
Ready for release (100% convergence and no SNAPSHOTS):[Error] Error
You do not have 100% convergence.
+

Dependencies used in modules

+

ch.qos.reload4j:reload4j

+ + + +
[Error] + + + + + + +
1.2.19 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
          \- org.slf4j:slf4j-reload4j:jar:1.7.36:provided
             \- ch.qos.reload4j:reload4j:jar:1.2.19:provided

1.2.22 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       +- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
       |  +- ch.qos.reload4j:reload4j:jar:1.2.22:provided
       |  \- org.apache.hadoop:hadoop-auth:jar:3.3.6:provided
       |     \- ch.qos.reload4j:reload4j:jar:1.2.22:provided
       \- org.apache.hadoop:hadoop-mapreduce-client-core:jar:3.3.6:provided
          \- org.apache.hadoop:hadoop-yarn-common:jar:3.3.6:provided
             \- ch.qos.reload4j:reload4j:jar:1.2.22:provided

+

com.nimbusds:nimbus-jose-jwt

+ + + +
[Error] + + + + + + +
3.10 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
          \- org.apache.hadoop:hadoop-auth:jar:3.3.6:provided
             \- org.apache.kerby:kerb-simplekdc:jar:1.0.1:provided
                \- org.apache.kerby:kerb-client:jar:1.0.1:provided
                   \- org.apache.kerby:token-provider:jar:1.0.1:provided
                      \- com.nimbusds:nimbus-jose-jwt:jar:3.10:provided

9.8.1 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
          \- org.apache.hadoop:hadoop-auth:jar:3.3.6:provided
             \- com.nimbusds:nimbus-jose-jwt:jar:9.8.1:provided

+

commons-codec:commons-codec

+ + + +
[Error] + + + + + + + + + +
1.11 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
          \- org.apache.httpcomponents:httpclient:jar:4.5.13:provided
             \- commons-codec:commons-codec:jar:1.11:provided

1.15 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       +- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
       |  +- commons-codec:commons-codec:jar:1.15:provided
       |  \- org.apache.hadoop:hadoop-auth:jar:3.3.6:provided
       |     \- commons-codec:commons-codec:jar:1.15:provided
       \- org.apache.hadoop:hadoop-mapreduce-client-core:jar:3.3.6:provided
          \- org.apache.hadoop:hadoop-yarn-common:jar:3.3.6:provided
             \- commons-codec:commons-codec:jar:1.15:provided

1.17.0 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.avro:trevni-core:jar:1.12.0:compile
       \- org.apache.commons:commons-compress:jar:1.26.2:compile
          \- commons-codec:commons-codec:jar:1.17.0:compile

+

commons-io:commons-io

+ + + +
[Error] + + + + + + + + + +
2.16.1 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.avro:trevni-core:jar:1.12.0:compile
       \- org.apache.commons:commons-compress:jar:1.26.2:compile
          \- commons-io:commons-io:jar:2.16.1:compile

2.5 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
          \- org.apache.hadoop:hadoop-auth:jar:3.3.6:provided
             \- org.apache.kerby:kerb-simplekdc:jar:1.0.1:provided
                \- org.apache.kerby:kerb-client:jar:1.0.1:provided
                   \- org.apache.kerby:kerb-common:jar:1.0.1:provided
                      \- commons-io:commons-io:jar:2.5:provided

2.8.0 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       +- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
       |  \- commons-io:commons-io:jar:2.8.0:provided
       \- org.apache.hadoop:hadoop-mapreduce-client-core:jar:3.3.6:provided
          \- org.apache.hadoop:hadoop-yarn-common:jar:3.3.6:provided
             \- commons-io:commons-io:jar:2.8.0:provided

+

commons-logging:commons-logging

+ + + +
[Error] + + + + + + +
1.1.3 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
          \- commons-logging:commons-logging:jar:1.1.3:provided

1.2 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
          +- org.apache.httpcomponents:httpclient:jar:4.5.13:provided
          |  \- commons-logging:commons-logging:jar:1.2:provided
          +- commons-beanutils:commons-beanutils:jar:1.9.4:provided
          |  \- commons-logging:commons-logging:jar:1.2:provided
          \- org.apache.commons:commons-configuration2:jar:2.8.0:provided
             \- commons-logging:commons-logging:jar:1.2:provided

+

jakarta.activation:jakarta.activation-api

+ + + +
[Error] + + + + + + +
1.2.1 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
          \- jakarta.activation:jakarta.activation-api:jar:1.2.1:provided

1.2.2 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-mapreduce-client-core:jar:3.3.6:provided
          \- org.apache.hadoop:hadoop-yarn-common:jar:3.3.6:provided
             \- com.fasterxml.jackson.module:jackson-module-jaxb-annotations:jar:2.17.2:provided
                +- jakarta.xml.bind:jakarta.xml.bind-api:jar:2.3.3:provided
                |  \- jakarta.activation:jakarta.activation-api:jar:1.2.2:provided
                \- jakarta.activation:jakarta.activation-api:jar:1.2.2:provided

+

org.apache.avro:avro

+ + + +
[Error] + + + + + + +
1.12.0 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    +- org.apache.avro:avro-mapred:jar:1.12.0:compile
    |  +- org.apache.avro:avro-ipc:jar:1.12.0:compile
    |  |  \- org.apache.avro:avro:jar:1.12.0:compile
    |  \- org.apache.avro:avro-ipc-jetty:jar:1.12.0:compile
    |     \- org.apache.avro:avro:jar:1.12.0:compile
    \- org.apache.avro:avro:jar:1.12.0:compile

1.7.7 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
          \- org.apache.avro:avro:jar:1.7.7:provided

+

org.apache.commons:commons-text

+ + + +
[Error] + + + + + + +
1.10.0 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
          \- org.apache.commons:commons-text:jar:1.10.0:provided

1.9 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
          \- org.apache.commons:commons-configuration2:jar:2.8.0:provided
             \- org.apache.commons:commons-text:jar:1.9:provided

+

org.codehaus.woodstox:stax2-api

+ + + +
[Error] + + + + + + +
4.2 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
          \- com.fasterxml.woodstox:woodstox-core:jar:5.4.0:provided
             \- org.codehaus.woodstox:stax2-api:jar:4.2:provided

4.2.1 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
          \- org.codehaus.woodstox:stax2-api:jar:4.2.1:provided

+

org.eclipse.jetty:jetty-http

+ + + +
[Error] + + + + + + +
9.4.51.v20230217 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-yarn-client:jar:3.3.6:provided
          \- org.eclipse.jetty.websocket:websocket-client:jar:9.4.51.v20230217:provided
             \- org.eclipse.jetty:jetty-client:jar:9.4.51.v20230217:provided
                \- org.eclipse.jetty:jetty-http:jar:9.4.51.v20230217:provided

9.4.55.v20240627 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.avro:avro-mapred:jar:1.12.0:compile
       \- org.apache.avro:avro-ipc-jetty:jar:1.12.0:compile
          \- org.eclipse.jetty:jetty-server:jar:9.4.55.v20240627:compile
             \- org.eclipse.jetty:jetty-http:jar:9.4.55.v20240627:compile

+

org.eclipse.jetty:jetty-io

+ + + +
[Error] + + + + + + +
9.4.51.v20230217 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-yarn-client:jar:3.3.6:provided
          \- org.eclipse.jetty.websocket:websocket-client:jar:9.4.51.v20230217:provided
             +- org.eclipse.jetty:jetty-client:jar:9.4.51.v20230217:provided
             |  \- org.eclipse.jetty:jetty-io:jar:9.4.51.v20230217:provided
             +- org.eclipse.jetty:jetty-io:jar:9.4.51.v20230217:compile
             \- org.eclipse.jetty.websocket:websocket-common:jar:9.4.51.v20230217:provided
                \- org.eclipse.jetty:jetty-io:jar:9.4.51.v20230217:provided

9.4.55.v20240627 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.avro:avro-mapred:jar:1.12.0:compile
       \- org.apache.avro:avro-ipc-jetty:jar:1.12.0:compile
          \- org.eclipse.jetty:jetty-server:jar:9.4.55.v20240627:compile
             +- org.eclipse.jetty:jetty-http:jar:9.4.55.v20240627:compile
             |  \- org.eclipse.jetty:jetty-io:jar:9.4.55.v20240627:compile
             \- org.eclipse.jetty:jetty-io:jar:9.4.55.v20240627:compile

+

org.slf4j:slf4j-api

+ + + +
[Error] + + + + + + + + + + + + + + + +
1.7.22 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
          \- io.dropwizard.metrics:metrics-core:jar:3.2.4:provided
             \- org.slf4j:slf4j-api:jar:1.7.22:provided

1.7.25 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
          +- org.apache.hadoop:hadoop-auth:jar:3.3.6:provided
          |  \- org.apache.kerby:kerb-simplekdc:jar:1.0.1:provided
          |     \- org.apache.kerby:kerb-client:jar:1.0.1:provided
          |        \- org.apache.kerby:kerby-config:jar:1.0.1:provided
          |           \- org.slf4j:slf4j-api:jar:1.7.25:provided
          \- org.apache.kerby:kerb-core:jar:1.0.1:provided
             \- org.apache.kerby:kerby-pkix:jar:1.0.1:provided
                \- org.slf4j:slf4j-api:jar:1.7.25:provided

1.7.30 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.avro:avro-mapred:jar:1.12.0:compile
       \- org.apache.avro:avro-ipc:jar:1.12.0:compile
          \- org.apache.velocity:velocity-engine-core:jar:2.3:compile
             \- org.slf4j:slf4j-api:jar:1.7.30:compile

1.7.36 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       +- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
       |  +- org.slf4j:slf4j-api:jar:1.7.36:provided
       |  +- org.slf4j:slf4j-reload4j:jar:1.7.36:provided
       |  |  \- org.slf4j:slf4j-api:jar:1.7.36:provided
       |  \- org.apache.hadoop:hadoop-auth:jar:3.3.6:provided
       |     \- org.slf4j:slf4j-api:jar:1.7.36:provided
       +- org.apache.hadoop:hadoop-mapreduce-client-core:jar:3.3.6:provided
       |  +- org.apache.hadoop:hadoop-yarn-common:jar:3.3.6:provided
       |  |  \- org.slf4j:slf4j-api:jar:1.7.36:provided
       |  \- org.slf4j:slf4j-api:jar:1.7.36:provided
       \- org.apache.hadoop:hadoop-mapreduce-client-jobclient:jar:3.3.6:provided
          +- org.apache.hadoop:hadoop-mapreduce-client-common:jar:3.3.6:provided
          |  \- org.slf4j:slf4j-api:jar:1.7.36:provided
          \- org.slf4j:slf4j-api:jar:1.7.36:provided

2.0.13 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    +- org.apache.avro:trevni-core:jar:1.12.0:compile
    |  \- org.slf4j:slf4j-api:jar:2.0.13:compile
    +- org.apache.avro:trevni-core:jar:tests:1.12.0:test
    |  \- org.slf4j:slf4j-api:jar:2.0.13:test
    +- org.apache.avro:avro-mapred:jar:1.12.0:compile
    |  +- org.apache.avro:avro-ipc:jar:1.12.0:compile
    |  |  \- org.slf4j:slf4j-api:jar:2.0.13:compile
    |  +- org.apache.avro:avro-ipc-jetty:jar:1.12.0:compile
    |  |  \- org.slf4j:slf4j-api:jar:2.0.13:compile
    |  \- org.slf4j:slf4j-api:jar:2.0.13:compile
    +- org.apache.avro:avro:jar:1.12.0:compile
    |  \- org.slf4j:slf4j-api:jar:2.0.13:compile
    +- org.slf4j:slf4j-api:jar:2.0.13:compile
    \- org.slf4j:slf4j-simple:jar:2.0.13:test
       \- org.slf4j:slf4j-api:jar:2.0.13:test

+
+
+
+
+
+ + + diff --git a/doc/content/en/docs/1.12.0/trevni/dependency-info.html b/doc/content/en/docs/1.12.0/trevni/dependency-info.html new file mode 100644 index 00000000000..f43fbc7da7e --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/dependency-info.html @@ -0,0 +1,118 @@ + + + + + + + + Trevni Java – Dependency Information + + + + + + + + + +
+
+
+

Dependency Information

+

Apache Maven

+
+
<dependency>
+  <groupId>org.apache.avro</groupId>
+  <artifactId>trevni-java</artifactId>
+  <version>1.12.0</version>
+  <type>pom</type>
+</dependency>
+

Apache Ivy

+
+
<dependency org="org.apache.avro" name="trevni-java" rev="1.12.0">
+  <artifact name="trevni-java" type="pom" />
+</dependency>
+

Groovy Grape

+
+
@Grapes(
+@Grab(group='org.apache.avro', module='trevni-java', version='1.12.0')
+)
+

Gradle/Grails

+
+
implementation 'org.apache.avro:trevni-java:1.12.0'
+

Scala SBT

+
+
libraryDependencies += "org.apache.avro" % "trevni-java" % "1.12.0"
+

Leiningen

+
+
[org.apache.avro/trevni-java "1.12.0"]
+
+
+
+
+
+ + + diff --git a/doc/content/en/docs/1.12.0/trevni/dependency-management.html b/doc/content/en/docs/1.12.0/trevni/dependency-management.html new file mode 100644 index 00000000000..310a167e880 --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/dependency-management.html @@ -0,0 +1,613 @@ + + + + + + + + Trevni Java – Project Dependency Management + + + + + + + + + +
+
+
+

Project Dependency Management

+

compile

+

The following is a list of compile dependencies in the DependencyManagement of this project. These dependencies can be included in the submodules to compile and run the submodule:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
GroupIdArtifactIdVersionTypeLicense
com.fasterxml.jackson.corejackson-annotations2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.corejackson-core2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.corejackson-databind2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.dataformatjackson-dataformat-avro2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.dataformatjackson-dataformat-cbor2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.dataformatjackson-dataformat-csv2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.dataformatjackson-dataformat-ion2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.dataformatjackson-dataformat-properties2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.dataformatjackson-dataformat-protobuf2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.dataformatjackson-dataformat-smile2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.dataformatjackson-dataformat-toml2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.dataformatjackson-dataformat-xml2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.dataformatjackson-dataformat-yaml2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.datatypejackson-datatype-eclipse-collections2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.datatypejackson-datatype-guava2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.datatypejackson-datatype-hibernate42.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.datatypejackson-datatype-hibernate52.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.datatypejackson-datatype-hibernate5-jakarta2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.datatypejackson-datatype-hibernate62.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.datatypejackson-datatype-hppc2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.datatypejackson-datatype-jakarta-jsonp2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.datatypejackson-datatype-jaxrs2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.datatypejackson-datatype-jdk82.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.datatypejackson-datatype-joda2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.datatypejackson-datatype-joda-money2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.datatypejackson-datatype-json-org2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.datatypejackson-datatype-jsr3102.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.datatypejackson-datatype-jsr3532.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.datatypejackson-datatype-pcollections2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jakarta.rsjackson-jakarta-rs-base2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jakarta.rsjackson-jakarta-rs-cbor-provider2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jakarta.rsjackson-jakarta-rs-json-provider2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jakarta.rsjackson-jakarta-rs-smile-provider2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jakarta.rsjackson-jakarta-rs-xml-provider2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jakarta.rsjackson-jakarta-rs-yaml-provider2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jaxrsjackson-jaxrs-base2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jaxrsjackson-jaxrs-cbor-provider2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jaxrsjackson-jaxrs-json-provider2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jaxrsjackson-jaxrs-smile-provider2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jaxrsjackson-jaxrs-xml-provider2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jaxrsjackson-jaxrs-yaml-provider2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jrjackson-jr-all2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jrjackson-jr-annotation-support2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jrjackson-jr-extension-javatime2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jrjackson-jr-objects2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jrjackson-jr-retrofit22.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jrjackson-jr-stree2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-afterburner2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-android-record2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-blackbird2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-guice2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-guice72.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-jakarta-xmlbind-annotations2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-jaxb-annotations2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-jsonSchema2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-jsonSchema-jakarta2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-kotlin2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-mrbean2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-no-ctor-deser2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-osgi2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-parameter-names2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-paranamer2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-scala_2.112.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-scala_2.122.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-scala_2.132.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-scala_32.17.2jarThe Apache Software License, Version 2.0
com.github.lubenzstd-jni1.5.6-4jarBSD 2-Clause License
io.grpcgrpc-core1.65.1jarApache 2.0
io.grpcgrpc-netty1.65.1jarApache 2.0
io.grpcgrpc-stub1.65.1jarApache 2.0
javax.servletjavax.servlet-api4.0.1jarCDDL + GPLv2 with classpath exception
net.sf.jopt-simplejopt-simple5.0.4jarThe MIT License
org.apache.commonscommons-compress1.26.2jarApache-2.0
org.apache.commonscommons-lang33.15.0jarApache-2.0
org.apache.hadoophadoop-client3.3.6jarApache License, Version 2.0
org.apache.maven.plugin-toolsmaven-plugin-annotations3.10.2jarApache-2.0
org.apache.velocityvelocity-engine-core2.3jarApache License, Version 2.0
org.eclipse.jettyjetty-server9.4.55.v20240627jarApache Software License - Version 2.0, Eclipse Public License - Version 1.0
org.eclipse.jettyjetty-servlet9.4.55.v20240627jarApache Software License - Version 2.0, Eclipse Public License - Version 1.0
org.eclipse.jettyjetty-util9.4.55.v20240627jarApache Software License - Version 2.0, Eclipse Public License - Version 1.0
org.tukaanixz1.9jarPublic Domain
org.xerial.snappysnappy-java1.1.10.5jarApache-2.0
+

test

+

The following is a list of test dependencies in the DependencyManagement of this project. These dependencies can be included in the submodules to compile and run unit tests for the submodule:

+ + + + + + + + + + + + + + + + + + +
GroupIdArtifactIdVersionTypeLicense
org.hamcresthamcrest-library2.2jarBSD License 3
org.mockitomockito-core5.12.0jarMIT
+
+
+
+
+
+ + + diff --git a/doc/content/en/docs/1.12.0/trevni/distribution-management.html b/doc/content/en/docs/1.12.0/trevni/distribution-management.html new file mode 100644 index 00000000000..6301bd5c270 --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/distribution-management.html @@ -0,0 +1,94 @@ + + + + + + + + Trevni Java – Project Distribution Management + + + + + + + + + +
+
+
+

Overview

+

The following is the distribution management information used by this project.

+

Repository - apache.releases.https

https://repository.apache.org/service/local/staging/deploy/maven2
+

Snapshot Repository - apache.snapshots.https

https://repository.apache.org/content/repositories/snapshots
+
+
+
+
+
+ + + diff --git a/doc/content/en/docs/1.12.0/trevni/images/close.gif b/doc/content/en/docs/1.12.0/trevni/images/close.gif new file mode 100644 index 00000000000..1c26bbc5264 Binary files /dev/null and b/doc/content/en/docs/1.12.0/trevni/images/close.gif differ diff --git a/doc/content/en/docs/1.12.0/trevni/images/collapsed.gif b/doc/content/en/docs/1.12.0/trevni/images/collapsed.gif new file mode 100644 index 00000000000..6e710840640 Binary files /dev/null and b/doc/content/en/docs/1.12.0/trevni/images/collapsed.gif differ diff --git a/doc/content/en/docs/1.12.0/trevni/images/expanded.gif b/doc/content/en/docs/1.12.0/trevni/images/expanded.gif new file mode 100644 index 00000000000..0fef3d89e0d Binary files /dev/null and b/doc/content/en/docs/1.12.0/trevni/images/expanded.gif differ diff --git a/doc/content/en/docs/1.12.0/trevni/images/external.png b/doc/content/en/docs/1.12.0/trevni/images/external.png new file mode 100644 index 00000000000..3f999fc88b3 Binary files /dev/null and b/doc/content/en/docs/1.12.0/trevni/images/external.png differ diff --git a/doc/content/en/docs/1.12.0/trevni/images/icon_error_sml.gif b/doc/content/en/docs/1.12.0/trevni/images/icon_error_sml.gif new file mode 100644 index 00000000000..61132ef2b01 Binary files /dev/null and b/doc/content/en/docs/1.12.0/trevni/images/icon_error_sml.gif differ diff --git a/doc/content/en/docs/1.12.0/trevni/images/icon_info_sml.gif b/doc/content/en/docs/1.12.0/trevni/images/icon_info_sml.gif new file mode 100644 index 00000000000..c6cb9ad7ce4 Binary files /dev/null and b/doc/content/en/docs/1.12.0/trevni/images/icon_info_sml.gif differ diff --git a/doc/content/en/docs/1.12.0/trevni/images/icon_success_sml.gif b/doc/content/en/docs/1.12.0/trevni/images/icon_success_sml.gif new file mode 100644 index 00000000000..52e85a430af Binary files /dev/null and b/doc/content/en/docs/1.12.0/trevni/images/icon_success_sml.gif differ diff --git a/doc/content/en/docs/1.12.0/trevni/images/icon_warning_sml.gif b/doc/content/en/docs/1.12.0/trevni/images/icon_warning_sml.gif new file mode 100644 index 00000000000..873bbb52cb9 Binary files /dev/null and b/doc/content/en/docs/1.12.0/trevni/images/icon_warning_sml.gif differ diff --git a/doc/content/en/docs/1.12.0/trevni/images/logos/build-by-maven-black.png b/doc/content/en/docs/1.12.0/trevni/images/logos/build-by-maven-black.png new file mode 100644 index 00000000000..919fd0f66a7 Binary files /dev/null and b/doc/content/en/docs/1.12.0/trevni/images/logos/build-by-maven-black.png differ diff --git a/doc/content/en/docs/1.12.0/trevni/images/logos/build-by-maven-white.png b/doc/content/en/docs/1.12.0/trevni/images/logos/build-by-maven-white.png new file mode 100644 index 00000000000..7d44c9c2e57 Binary files /dev/null and b/doc/content/en/docs/1.12.0/trevni/images/logos/build-by-maven-white.png differ diff --git a/doc/content/en/docs/1.12.0/trevni/images/logos/maven-feather.png b/doc/content/en/docs/1.12.0/trevni/images/logos/maven-feather.png new file mode 100644 index 00000000000..b5ada836e9e Binary files /dev/null and b/doc/content/en/docs/1.12.0/trevni/images/logos/maven-feather.png differ diff --git a/doc/content/en/docs/1.12.0/trevni/images/newwindow.png b/doc/content/en/docs/1.12.0/trevni/images/newwindow.png new file mode 100644 index 00000000000..6287f72bd08 Binary files /dev/null and b/doc/content/en/docs/1.12.0/trevni/images/newwindow.png differ diff --git a/doc/content/en/docs/1.12.0/trevni/index.html b/doc/content/en/docs/1.12.0/trevni/index.html new file mode 100644 index 00000000000..b28fd9cfdce --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/index.html @@ -0,0 +1,107 @@ + + + + + + + + Trevni Java – About + + + + + + + + + +
+
+
+

About Trevni Java

+

Trevni Java

+

Project Modules

+

This project has declared the following modules:

+ + + + + + + + + + + + +
NameDescription
Trevni Java CoreTrevni Java Core
Trevni Java AvroTrevni Java Avro
Trevni SpecificationTrevni Java
+
+
+
+
+
+ + + diff --git a/doc/content/en/docs/1.12.0/trevni/issue-management.html b/doc/content/en/docs/1.12.0/trevni/issue-management.html new file mode 100644 index 00000000000..fa89c231276 --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/issue-management.html @@ -0,0 +1,96 @@ + + + + + + + + Trevni Java – Issue Management + + + + + + + + + +
+
+
+

Overview

+

This project uses JIRA.

+

Issue Management

+

Issues, bugs, and feature requests should be submitted to the following issue management system for this project.

+
+
+
+
+
+
+ + + diff --git a/doc/content/en/docs/1.12.0/trevni/licenses.html b/doc/content/en/docs/1.12.0/trevni/licenses.html new file mode 100644 index 00000000000..13153347619 --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/licenses.html @@ -0,0 +1,298 @@ + + + + + + + + Trevni Java – Project Licenses + + + + + + + + + +
+
+
+

Overview

+

Typically the licenses listed for the project are that of the project itself, and not of dependencies.

+

Project Licenses

+

Apache-2.0

+
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+
+
+
+
+
+ + + diff --git a/doc/content/en/docs/1.12.0/trevni/mailing-lists.html b/doc/content/en/docs/1.12.0/trevni/mailing-lists.html new file mode 100644 index 00000000000..3ad9d4a9e2b --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/mailing-lists.html @@ -0,0 +1,117 @@ + + + + + + + + Trevni Java – Project Mailing Lists + + + + + + + + + +
+
+
+

Project Mailing Lists

+

These are the mailing lists that have been established for this project. For each list, there is a subscribe, unsubscribe, and an archive link.

+ + + + + + + + + + + + + + + + + + + + + + + + +
NameSubscribeUnsubscribePostArchive
Avro Developer ListSubscribeUnsubscribePostmail-archives.apache.org
Avro Users ListSubscribeUnsubscribePostmail-archives.apache.org
Avro Commits ListSubscribeUnsubscribePostmail-archives.apache.org
+
+
+
+
+
+ + + diff --git a/doc/content/en/docs/1.12.0/trevni/modules.html b/doc/content/en/docs/1.12.0/trevni/modules.html new file mode 100644 index 00000000000..aa7181309ea --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/modules.html @@ -0,0 +1,105 @@ + + + + + + + + Trevni Java – Project Modules + + + + + + + + + +
+
+
+

Project Modules

+

This project has declared the following modules:

+ + + + + + + + + + + + +
NameDescription
Trevni Java CoreTrevni Java Core
Trevni Java AvroTrevni Java Avro
Trevni SpecificationTrevni Java
+
+
+
+
+
+ + + diff --git a/doc/content/en/docs/1.12.0/trevni/plugin-management.html b/doc/content/en/docs/1.12.0/trevni/plugin-management.html new file mode 100644 index 00000000000..79b358bc983 --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/plugin-management.html @@ -0,0 +1,244 @@ + + + + + + + + Trevni Java – Project Plugin Management + + + + + + + + + +
+
+
+

Project Plugin Management

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
GroupIdArtifactIdVersion
com.diffplug.spotlessspotless-maven-plugin2.43.0
org.apache.maven.pluginsmaven-antrun-plugin3.1.0
org.apache.maven.pluginsmaven-assembly-plugin3.6.0
org.apache.maven.pluginsmaven-checkstyle-plugin3.4.0
org.apache.maven.pluginsmaven-clean-plugin3.3.2
org.apache.maven.pluginsmaven-compiler-plugin3.13.0
org.apache.maven.pluginsmaven-dependency-plugin3.6.1
org.apache.maven.pluginsmaven-deploy-plugin3.1.1
org.apache.maven.pluginsmaven-ear-plugin3.3.0
org.apache.maven.pluginsmaven-enforcer-plugin3.5.0
org.apache.maven.pluginsmaven-failsafe-plugin3.2.2
org.apache.maven.pluginsmaven-gpg-plugin3.2.4
org.apache.maven.pluginsmaven-help-plugin3.4.0
org.apache.maven.pluginsmaven-install-plugin3.1.1
org.apache.maven.pluginsmaven-invoker-plugin3.6.0
org.apache.maven.pluginsmaven-jar-plugin3.3.0
org.apache.maven.pluginsmaven-javadoc-plugin3.8.0
org.apache.maven.pluginsmaven-plugin-plugin3.13.1
org.apache.maven.pluginsmaven-plugin-report-plugin3.10.2
org.apache.maven.pluginsmaven-project-info-reports-plugin3.4.5
org.apache.maven.pluginsmaven-release-plugin3.0.1
org.apache.maven.pluginsmaven-remote-resources-plugin3.2.0
org.apache.maven.pluginsmaven-resources-plugin3.3.1
org.apache.maven.pluginsmaven-scm-plugin2.0.1
org.apache.maven.pluginsmaven-scm-publish-plugin3.2.1
org.apache.maven.pluginsmaven-shade-plugin3.6.0
org.apache.maven.pluginsmaven-site-plugin3.12.1
org.apache.maven.pluginsmaven-source-plugin3.3.1
org.apache.maven.pluginsmaven-surefire-plugin3.3.1
org.apache.maven.pluginsmaven-surefire-report-plugin3.2.2
org.apache.maven.pluginsmaven-toolchains-plugin3.2.0
org.apache.maven.pluginsmaven-war-plugin3.4.0
org.apache.ratapache-rat-plugin0.16.1
org.codehaus.mojobuild-helper-maven-plugin3.6.0
org.codehaus.mojoexec-maven-plugin3.3.0
org.cyclonedxcyclonedx-maven-plugin2.8.0
org.javacc.pluginjavacc-maven-plugin3.0.3
+
+
+
+
+
+ + + diff --git a/doc/content/en/docs/1.12.0/trevni/plugins.html b/doc/content/en/docs/1.12.0/trevni/plugins.html new file mode 100644 index 00000000000..7dfb97d6313 --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/plugins.html @@ -0,0 +1,158 @@ + + + + + + + + Trevni Java – Project Plugins + + + + + + + + + +
+
+
+

Project Build Plugins

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
GroupIdArtifactIdVersion
com.diffplug.spotlessspotless-maven-plugin2.43.0
org.apache.felixmaven-bundle-plugin5.1.9
org.apache.maven.pluginsmaven-checkstyle-plugin3.4.0
org.apache.maven.pluginsmaven-clean-plugin3.3.2
org.apache.maven.pluginsmaven-deploy-plugin3.1.1
org.apache.maven.pluginsmaven-enforcer-plugin3.5.0
org.apache.maven.pluginsmaven-install-plugin3.1.1
org.apache.maven.pluginsmaven-plugin-plugin3.13.1
org.apache.maven.pluginsmaven-remote-resources-plugin3.2.0
org.apache.maven.pluginsmaven-site-plugin3.12.1
org.apache.maven.pluginsmaven-toolchains-plugin3.2.0
org.apache.ratapache-rat-plugin0.16.1
org.cyclonedxcyclonedx-maven-plugin2.8.0
+

Project Report Plugins

+ + + + + + + + +
GroupIdArtifactIdVersion
org.apache.maven.pluginsmaven-project-info-reports-plugin3.4.5
+
+
+
+
+
+ + + diff --git a/doc/content/en/docs/1.12.0/trevni/project-info.html b/doc/content/en/docs/1.12.0/trevni/project-info.html new file mode 100644 index 00000000000..df0c1e3ebf8 --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/project-info.html @@ -0,0 +1,139 @@ + + + + + + + + Trevni Java – Project Information + + + + + + + + + +
+
+
+

Project Information

+

This document provides an overview of the various documents and links that are part of this project's general information. All of this content is automatically generated by Maven on behalf of the project.

+

Overview

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DocumentDescription
DependenciesThis document lists the project's dependencies and provides information on each dependency.
Dependency ConvergenceThis document presents the convergence of dependency versions across the entire project, and its sub modules.
Dependency InformationThis document describes how to include this project as a dependency using various dependency management tools.
Dependency ManagementThis document lists the dependencies that are defined through dependencyManagement.
Distribution ManagementThis document provides informations on the distribution management of this project.
AboutTrevni Java
Issue ManagementThis document provides information on the issue management system used in this project.
LicensesThis document lists the project license(s).
Mailing ListsThis document provides subscription and archive information for this project's mailing lists.
Project ModulesThis document lists the modules (sub-projects) of this project.
Plugin ManagementThis document lists the plugins that are defined through pluginManagement.
PluginsThis document lists the build plugins and the report plugins used by this project.
Source Code ManagementThis document lists ways to access the online source repository.
SummaryThis document lists other related information of this project
+
+
+
+
+
+ + + diff --git a/doc/content/en/docs/1.12.0/trevni/scm.html b/doc/content/en/docs/1.12.0/trevni/scm.html new file mode 100644 index 00000000000..c9117f1f2dc --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/scm.html @@ -0,0 +1,106 @@ + + + + + + + + Trevni Java – Source Code Management + + + + + + + + + +
+
+
+

Overview

+

This project uses Git to manage its source code. Instructions on Git use can be found at https://git-scm.com/documentation.

+

Web Browser Access

+

The following is a link to a browsable version of the source repository:

+
+

Anonymous Access

+

The source can be checked out anonymously from Git with this command (See https://git-scm.com/docs/git-clone):

+
+
$ git clone https://github.com/apache/avro/lang/avro-parent/trevni-java
+

Developer Access

+

Only project developers can access the Git tree via this method (See https://git-scm.com/docs/git-clone).

+
+
$ git clone https://github.com/apache/avro/lang/avro-parent/trevni-java
+

Access from Behind a Firewall

+

Refer to the documentation of the SCM used for more information about access behind a firewall.

+
+
+
+
+
+ + + diff --git a/doc/content/en/docs/1.12.0/trevni/summary.html b/doc/content/en/docs/1.12.0/trevni/summary.html new file mode 100644 index 00000000000..ee8ac2578cd --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/summary.html @@ -0,0 +1,133 @@ + + + + + + + + Trevni Java – Project Summary + + + + + + + + + +
+
+
+

Project Summary

+

Project Information

+ + + + + + + + + + + + +
FieldValue
NameTrevni Java
DescriptionTrevni Java
Homepagehttps://avro.apache.org/
+

Project Organization

+ + + + + + + + + +
FieldValue
NameThe Apache Software Foundation
URLhttps://www.apache.org/
+

Build Information

+ + + + + + + + + + + + + + + +
FieldValue
GroupIdorg.apache.avro
ArtifactIdtrevni-java
Version1.12.0
Typepom
+
+
+
+
+
+ + + diff --git a/doc/content/en/docs/_index.md b/doc/content/en/docs/_index.md new file mode 100755 index 00000000000..db24f656ee1 --- /dev/null +++ b/doc/content/en/docs/_index.md @@ -0,0 +1,56 @@ + +--- +title: "Documentation" +linkTitle: "Documentation" +--- + + + +## Introduction + +Apache Avroâ„ĸ is a data serialization system. + +Avro provides: + +* Rich data structures. +* A compact, fast, binary data format. +* A container file, to store persistent data. +* Remote procedure call (RPC). +* Simple integration with dynamic languages. Code generation is not required to read or write data files nor to use or implement RPC protocols. Code generation as an optional optimization, only worth implementing for statically typed languages. + +## Schemas + +Avro relies on schemas. When Avro data is read, the schema used when writing it is always present. This permits each datum to be written with no per-value overhead, making serialization both fast and small. This also facilitates use with dynamic, scripting languages, since data, together with its schema, is fully self-describing. + +When Avro data is stored in a file, its schema is stored with it, so that files may be processed later by any program. If the program reading the data expects a different schema this can be easily resolved, since both schemas are present. + +When Avro is used in RPC, the client and server exchange schemas in the connection handshake. (This can be optimized so that, for most calls, no schemas are actually transmitted.) Since both client and server both have the other's full schema, correspondence between same named fields, missing fields, extra fields, etc. can all be easily resolved. + +Avro schemas are defined with JSON . This facilitates implementation in languages that already have JSON libraries. + +## Comparison with other systems + +Avro provides functionality similar to systems such as [Thrift](https://thrift.apache.org/), [Protocol Buffers](https://code.google.com/p/protobuf/), etc. Avro differs from these systems in the following fundamental aspects. + +* Dynamic typing: Avro does not require that code be generated. Data is always accompanied by a schema that permits full processing of that data without code generation, static datatypes, etc. This facilitates construction of generic data-processing systems and languages. +* Untagged data: Since the schema is present when data is read, considerably less type information need be encoded with data, resulting in smaller serialization size. +* No manually-assigned field IDs: When a schema changes, both the old and new schema are always present when processing data, so differences may be resolved symbolically, using field names. diff --git a/doc/content/en/project/Articles/_index.md b/doc/content/en/project/Articles/_index.md new file mode 100755 index 00000000000..e30d9ef1d07 --- /dev/null +++ b/doc/content/en/project/Articles/_index.md @@ -0,0 +1,84 @@ +--- +title: "Articles" +linkTitle: "Articles" +weight: 4 +--- + + +** ** + +**Guide to Apache Avro** +Feb 19, 2023, by baeldung. + +https://www.baeldung.com/java-apache-avro + +** ** + +**Apache Avro IDL Schema Support**, +Apr 11, 2022, by Oscar Westra van Holthe - Kind. + +https://plugins.jetbrains.com/plugin/15728-apache-avro-idl-schema-support + +** ** + +**Generate random JSON data from an AVRO schema using Java**, +Jan 24, 2022, by Maarten Smeets. + +https://technology.amis.nl/soa/kafka/generate-random-json-data-from-an-avro-schema-using-java/ + +** ** + +**A Gentle (and Practical) Introduction to Apache Avro**, +Dec 22, 2020, by Anton Rodriguez. + +https://dzone.com/articles/gentle-and-practical-introduction-to-apache-avro-part-1 + +** ** + +**Apache Avro – A data serialization system** +Dec 09, 2018, by Dennis Vriend. + +https://binx.io/2018/12/09/apache-avro/ + +** ** + +**Introduction to Apache Avro** +Mar 12, 2016, by Bartosz Konieczny. + +https://www.waitingforcode.com/apache-avro/introduction-to-apache-avro/read + +** ** + +**Reading and Writing Avro Files from the Command Line**, +Mar 17, 2013, by Michael G. Noll. + +https://www.michael-noll.com/blog/2013/03/17/reading-and-writing-avro-files-from-the-command-line/ + +** ** + +**Using Apache Avro** +Jan 25, 2011, by Boris Lublinsky. + +https://www.infoq.com/articles/ApacheAvro/ + + + diff --git a/doc/content/en/project/Committer onboarding guide/_index.md b/doc/content/en/project/Committer onboarding guide/_index.md new file mode 100755 index 00000000000..eb865a42ae2 --- /dev/null +++ b/doc/content/en/project/Committer onboarding guide/_index.md @@ -0,0 +1,48 @@ +--- +title: "Committer onboarding guide" +linkTitle: "Committer onboarding guide" +weight: 7 +--- + + +** ** +For you, the new committer: + +1. File your ICLA and send it to secretary@apache.org +2. Log in to https://whimsy.apache.org; that will confirm a working ASF account +3. You can edit email routing for the account, and add other emails that you own +4. You can directly edit mailing list subscriptions (for example, you might switch them to your ASF account - you can still post from any of your registered emails) +5. Link your GitHub account with your ASF account at https://gitbox.apache.org/ once you see the big green "Merge" button on pull requests, this is working +7. Read the ASF new committer guide: https://www.apache.org/dev/new-committers-guide.html + +** ** + +A committer in JIRA can add a new contributor by following these steps: + +1. Log in to JIRA with your committer credentials. +2. Navigate to the project where you want to add the new contributor. +3. Click on the "People" tab at the top of the page. +4. Click on the "Add People" button. +5. Enter the email address of the new contributor in the "Email Address" field. +6. Select the appropriate role for the new contributor from the "Role" dropdown menu. +7. Click the "Add" button to add the new contributor to the project. +8. An email will be sent to the new contributor asking them to accept the invitation to join the project. diff --git a/doc/content/en/project/Contributors onboarding guide/_index.md b/doc/content/en/project/Contributors onboarding guide/_index.md new file mode 100644 index 00000000000..71d9336979f --- /dev/null +++ b/doc/content/en/project/Contributors onboarding guide/_index.md @@ -0,0 +1,41 @@ +--- +title: "Contributor onboarding guide" +linkTitle: "Contributor onboarding guide" +weight: 8 +aliases: + - /docs/contribution-guidelines +--- + + + + +1. Familiarize yourself with Apache Avro: Before you start contributing to Apache Avro, it's essential to have a good understanding of what Apache Avro is and how it works. You can start by reading the Apache Avro documentation to get an overview of the project's features, use cases, and architecture. + +2. Join the Apache Avro community: Join the Apache Avro mailing lists, IRC channels, and forums to interact with other contributors and users. You can ask questions, discuss ideas, and get feedback on your contributions from experienced contributors. +3. Set up your development environment: To contribute to Apache Avro, you need to set up your development environment. The Apache Avro project uses Git for version control, and Apache Maven for building. You can follow the instructions in the Apache Avro documentation to set up your environment. +4. Choose a contribution: Apache Avro is an open-source project, and there are always new features, bug fixes, and improvements that can be made. You can choose from a wide range of contributions, from documentation updates to code changes. +5. Review existing issues and pull requests: Before you start working on a contribution, it's important to review existing issues and pull requests to avoid duplicating efforts. You can use the Apache Avro issue tracker to search for issues and pull requests related to your contribution. +6. Create a new issue or pull request: If you can't find an existing issue or pull request related to your contribution, you can create a new one. Make sure to provide detailed information about your contribution, including a description of the problem, proposed solution, and any relevant code changes. +7. Work on your contribution: Once you have a clear understanding of the contribution you want to make, you can start working on it. Make sure to follow the Apache Avro coding guidelines and best practices to ensure that your code is of high quality. +8. Submit your contribution: When you're ready to submit your contribution, create a pull request in the Apache Avro GitHub repository. Make sure to include a detailed description of your changes, and any relevant documentation or test cases. +9. Participate in reviews: Once you've submitted your contribution, it will be reviewed by other contributors. You may need to make additional changes based on their feedback before your contribution is accepted. +Celebrate your contribution: Once your contribution has been accepted, celebrate your achievement! You've helped improve Apache Avro and contributed to the open-source community. diff --git a/doc/content/en/project/Credits/_index.md b/doc/content/en/project/Credits/_index.md new file mode 100644 index 00000000000..00c6c91e9d9 --- /dev/null +++ b/doc/content/en/project/Credits/_index.md @@ -0,0 +1,67 @@ +--- +title: "Credits" +linkTitle: "Credits" +weight: 2 +aliases: +- /credits.html +--- + + + +## Apache Avro credits + +### Committers + +Apache Avro's active committers are: + +| **username** | **name** | **organization** | **roles** | **timezone** | +|:-------------|:-------------------------------|:------------------------|:----------------------:|:------------:| +| blue | Ryan Blue | Netflix | spec, java, ruby | -8 | +| brucem | Bruce Mitchener | Army of Bruce | c | +7 | +| busbey | Sean Busbey | Cloudera | java, ruby | -6 | +| cutting | Doug Cutting | Cloudera | spec, java | -8 | +| dcreager | Douglas Creager | RedJack, LLC | c | -5 | +| hammer | Jeff Hammerbacher | Cloudera | python | -8 | +| iemejia | IsmaÃĢl Mejía | Talend | java, docker | +1 | +| kojiromike | Michael A. Smith | Independent | python, docker | -5 | +| massie | Matt Massie | UC Berkeley | c | -8 | +| mgrigorov | Martin Grigorov | Huawei | rust | +2 | +| nielsbasjes | Niels Basjes | Bol.com | java, docker | +1 | +| opwvhk | Oscar Westra van Holthe - Kind | Royal Schiphol Group | spec, java, docker | +1 | +| rskraba | Ryan Skraba | Talend | java, docker | +1 | +| sbanacho | Scott Banachowski | Microsoft | c++ | -8 | +| scottcarey | Scott Carey | RichRelevance | java | -8 | +| sekikn | Kengo Seki | NTT Data | perl, interoperability | +9 | +| sharadag | Sharad Agarwal | InMobi | python | +5.5 | +| thiru | Thiruvalluvan M. G. | VertiCloud | java | +5.5 | +| tjwp | Tim Perkins | Shopify | ruby | -5 | +| tomwhite | Tom White | Cloudera | java | 0 | +------------- + +### Contributors +A list of Avro contributors and their contributions is available from [Jira](http://s.apache.org/AvroFixed) + +### Emeriti +Contributors who are no longer active on Avro are: + +* Philip Zeyliger +* Martin Kleppmann diff --git a/doc/content/en/project/Donate/_index.md b/doc/content/en/project/Donate/_index.md new file mode 100755 index 00000000000..c87561fefc1 --- /dev/null +++ b/doc/content/en/project/Donate/_index.md @@ -0,0 +1,29 @@ +--- +title: "Donate" +linkTitle: "Donate" +weight: 13 +manualLink: https://www.apache.org/foundation/sponsorship.html +--- + + + +If you would like to donate please see the Apache Software Foundation [donation program](https://www.apache.org/foundation/sponsorship.html) diff --git a/doc/content/en/project/Download/_index.md b/doc/content/en/project/Download/_index.md new file mode 100755 index 00000000000..eff8aa3e621 --- /dev/null +++ b/doc/content/en/project/Download/_index.md @@ -0,0 +1,81 @@ +--- +title: "Download" +linkTitle: "Download" +weight: 1 +--- + + + +## Download +Releases may be downloaded from Apache mirrors: [Download](https://www.apache.org/dyn/closer.cgi/avro/) + +The latest release is: Avro {{< avro_version >}} (3.4M, source, [pgp](https://downloads.apache.org/avro/avro-{{< avro_version >}}/avro-src-{{< avro_version >}}.tar.gz.asc), [sha512](https://downloads.apache.org/avro/avro-{{< avro_version >}}/avro-src-{{< avro_version >}}.tar.gz.sha512)) + +* C#: https://www.nuget.org/packages/Apache.Avro/{{< avro_version >}} +* Java: from Maven Central, +* Javascript: https://www.npmjs.com/package/avro-js/v/{{< avro_version >}} +* Perl: https://metacpan.org/release/Avro +* Python 3: https://pypi.org/project/avro/{{< avro_version >}} +* Ruby: https://rubygems.org/gems/avro/versions/{{< avro_version >}} + + +## Release Notes +Release notes for Avro releases are available in [Jira](https://issues.apache.org/jira/browse/AVRO?report=com.atlassian.jira.plugin.system.project:changelog-panel#selectedTab=com.atlassian.jira.plugin.system.project%3Achangelog-panel) + +##Verifying a release +It is essential that you verify the integrity of the downloaded files using the PGP signatures or SHA512 checksums. Please read [How to verify downloaded](https://www.apache.org/info/verification.html) files for more information on why you should verify our releases. + +The PGP signatures can be verified using PGP or GPG. First download the [KEYS](https://downloads.apache.org/avro/KEYS) file as well as the .asc signature files for the relevant release packages. Make sure you get these files from the main distribution directory, rather than from a mirror. Then verify the signatures using: + +```shell +% gpg --import KEYS +% gpg --verify downloaded_file.asc downloaded_file +``` + +or + +```shell +% pgpk -a KEYS +% pgpv downloaded_file.asc +``` + +or + +```shell +% pgp -ka KEYS +% pgp downloaded_file.asc +``` +Alternatively, you can verify the hash on the file. + +Hashes can be calculated using GPG: +```shell +% gpg --print-md SHA256 downloaded_file +``` +The output should be compared with the contents of the SHA256 file. Similarly for other hashes (SHA512, SHA1, MD5 etc) which may be provided. + +Windows 7 and later systems should all now have certUtil: +```shell +% certUtil -hashfile pathToFileToCheck +``` +HashAlgorithm choices: _MD2 MD4 MD5 SHA1 SHA256 SHA384 SHA512_ + +Unix-like systems (and macOS) will have a utility called _md5_, _md5sum_ or _shasum_. diff --git a/doc/content/en/project/Events/_index.md b/doc/content/en/project/Events/_index.md new file mode 100755 index 00000000000..7d8646283f0 --- /dev/null +++ b/doc/content/en/project/Events/_index.md @@ -0,0 +1,28 @@ +--- +title: "Events" +linkTitle: "Events" +weight: 12 +--- + + + +Apache Avro members often participate in events organized by the [Apache Software Foundation](https://www.apache.org/events/current-event.html) diff --git a/doc/content/en/project/How to contribute/_index.md b/doc/content/en/project/How to contribute/_index.md new file mode 100755 index 00000000000..6514d7c3627 --- /dev/null +++ b/doc/content/en/project/How to contribute/_index.md @@ -0,0 +1,388 @@ +--- +title: "How to contribute" +linkTitle: "How to contribute" +weight: 5 +--- + + + +## Getting the source code + +First of all, you need the Avro source code. + +The easiest way is to clone or fork the GitHub mirror: + +```shell +git clone https://github.com/apache/avro.git -o github +``` + +## Making Changes + +Before you start, file an issue in [JIRA](https://issues.apache.org/jira/browse/AVRO) or discuss your ideas on the [Avro developer mailing list](http://avro.apache.org/mailing_lists.html). Describe your proposed changes and check that they fit in with what others are doing and have planned for the project. Be patient, it may take folks a while to understand your requirements. + +Modify the source code and add some (very) nice features using your favorite IDE. + +But take care about the following points + +**All Languages** +- Contributions should pass existing unit tests. +- Contributions should document public facing APIs. +- Contributions should add new tests to demonstrate bug fixes or test new features. + +**Java** + +- All public classes and methods should have informative [Javadoc comments](https://www.oracle.com/fr/technical-resources/articles/java/javadoc-tool.html). +- Do not use @author tags. +- Java code should be formatted according to [Oracle's conventions](https://www.oracle.com/java/technologies/javase/codeconventions-introduction.html), with one exception: + - Indent two spaces per level, not four. +- [JUnit](http://www.junit.org/) is our test framework: +- You must implement a class whose class name starts with Test. +- Define methods within your class and tag them with the @Test annotation. Call JUnit's many assert methods to verify conditions; these methods will be executed when you run mvn test. +- By default, do not let tests write any temporary files to /tmp. Instead, the tests should write to the location specified by the test.dir system property. +- Place your class in the src/test/java/ tree. +- You can run all the unit tests with the command mvn test, or you can run a specific unit test with the command mvn -Dtest= test (for example mvn -Dtest=TestFoo test) + + +## Code Style (Autoformatting) + +For Java code we use [Spotless](https://github.com/diffplug/spotless/) to format the code to comply with Avro's code style conventions (see above). Automatic formatting relies on [Avro's Eclipse JDT formatter definition](https://github.com/apache/avro/blob/main/lang/java/eclipse-java-formatter.xml). You can use the same definition to auto format from Eclipse or from IntelliJ configuring the Eclipse formatter plugin. + +If you use maven code styles issues are checked at the compile phase. If your code breaks because of bad formatting, you can format it automatically by running the command: +```shell +mvn spotless:apply +``` + +## Unit Tests + +Please make sure that all unit tests succeed before constructing your patch and that no new compiler warnings are introduced by your patch. Each language has its own directory and test process. + +
Java + +```shell +cd avro-trunk/lang/java +mvn clean test +``` +
+ +
Python + +```shell +cd avro-trunk/lang/py +./setup.py build test +``` +
+ +
Rust + +```shell +cd avro-trunk/lang/rust +./build.sh clean test +``` +
+ +
C# + +```shell +cd avro-trunk/lang/csharp +./build.sh clean test +``` +
+ +
C + +```shell +cd avro-trunk/lang/c +./build.sh clean +./build.sh test +``` +
+ +
C++ + +```shell +cd avro-trunk/lang/c++ +./build.sh clean test +``` +
+ +
Ruby + +```shell +cd avro-trunk/lang/ruby +gem install echoe +rake clean test +``` +
+ +
PHP + +```shell +cd avro-trunk/lang/php +./build.sh clean +./build.sh test +``` +
+ + +## Contributing your code + +Contribution can be made directly via github with a Pull Request, or via a patch. + +**Via Github** + +Method is to create a [pull request](https://help.github.com/articles/using-pull-requests/). + +On your fork, create a branch named with JIRA (avro-1234_fixNpe for example) +On source, go to it +```shell +git pull +git switch avro-1234_fixNpe +``` + +code your changes (following preceding recommendations) + +check and add updated sources +```shell +git status + +# Add any new or changed files with: +git add src/.../MyNewClass.java +git add src/.../TestMyNewClass.java +``` + +Finally, create a commit with your changes and a good log message, and push it: +```shell +git commit -m "AVRO-1234: Fix NPE by adding check to ..." +git push +``` +On your github fork site, a button will propose you to build the Pull Request. +Click on it, fill Conversation form, and create it. +Link this PR to the corresponding JIRA ticket (on JIRA ticket, add PR to "Issue Links" chapter, and add label 'pull-request-available' to it . + + + +## Jira Guidelines + +Please comment on issues in [Jira](https://issues.apache.org/jira/projects/AVRO/issues), making your concerns known. Please also vote for issues that are a high priority for you. + +Please refrain from editing descriptions and comments if possible, as edits spam the mailing list and clutter Jira's "All" display, which is otherwise very useful. Instead, preview descriptions and comments using the preview button (on the right) before posting them. Keep descriptions brief and save more elaborate proposals for comments, since descriptions are included in Jira's automatically sent messages. If you change your mind, note this in a new comment, rather than editing an older comment. The issue should preserve this history of the discussion. + +## Stay involved + +Contributors should join the Avro mailing lists. In particular, the commit list (to see changes as they are made), the dev list (to join discussions of changes) and the user list (to help others). + +## Workflow + +Building and running the site locally requires a recent extended version of Hugo. Install [Hugo](https://gohugo.io/installation/) for your environment. Once you've made your working copy of the site repo, from the repo root folder, run: + +```shell +hugo server --navigateToChanged +``` +Edit .md and .html files in content/ folder + +Once satisfied with the changes, commit them: +```shell +git commit -a +``` +Generate the HTML file stop hugo server --navigateToChanged (with Ctrl+C) and run +```shell +hugo +``` +This will generate the HTMLs in public/ folder and this is actually what is being deployed + +Add the modified HTML files to Git + +```shell +git add . +git rm offline-search-index.<>.json +git commit -a +git push +``` +This way even when the PR modifies a lot of files we can review only the first commit, the meaningful one, with the modified files in content/ folder + + +## Running a container locally +You can also run avro-website inside a Docker container, the container runs with a volume bound to the avro-website folder. This approach doesn't require you to install any dependencies other than Docker Desktop on Windows and Mac, and Docker Compose on Linux. + +Build the docker image + +```shell +docker-compose build +``` +Run the built image + ```shell +docker-compose up +``` +NOTE: You can run both commands at once with docker-compose up --build. + +Verify that the service is working. + +Open your web browser and type http://localhost:1313 in your navigation bar, This opens a local instance of the docsy-example homepage. You can now make changes to the docsy example and those changes will immediately show up in your browser after you save. + +**Cleanup** + +To stop Docker Compose, on your terminal window, press Ctrl + C. + +To remove the produced images run: + ```shell +docker-compose rm +``` + +## Troubleshooting +As you run the website locally, you may run into the following error: + ```shell +➜ hugo server + +INFO 2021/01/21 21:07:55 Using config file: +Building sites â€Ļ INFO 2021/01/21 21:07:55 syncing static files to / +Built in 288 ms +Error: Error building site: TOCSS: failed to transform "scss/main.scss" (text/x-scss): resource "scss/scss/main.scss_9fadf33d895a46083cdd64396b57ef68" not found in file cache + ``` +This error occurs if you have not installed the extended version of Hugo. See our user guide for instructions on how to install Hugo. + +## Edit content +The website content is in content/en folder. It contains .md (Markdown) and .html (HTML) files. + +**Layouts** + +To change the layout of any page edit layouts//**.html. If there is no layout for a given page at that location then copy the one provided by the theme and edit it: + ```shell + cp themes/docsy/layouts/ layouts/ + ``` +**Avro version** + +When a new version of Apache Avro is released: + +Change the value of params.avroversion in config.toml +Add a new entry to the Releases pages in the Blog section, for example: + ```shell +cp content/en/blog/releases/avro-1.10.2-released.md content/en/blog/releases/avro-1.11.0-released.md + ``` +**API documentation for C/C++/C# modules** + +The API documentations for C/C++/C# are built by their respective build.sh dist implementations. The final HTML should be copied to the external folder, for example: + ```shell +cp ../avro/build/avro-doc-1.12.0-SNAPSHOT/api/c/* content/en/docs/external/c/ + ``` + +## JIRA conventions + +Issue types: JIRA issues are categorized into different types such as bugs, improvements, new features, etc. Each issue type has a unique icon and a set of fields that are specific to that type. + +Workflow: JIRA issues follow a predefined workflow that defines the steps that an issue goes through from creation to resolution. Each step in the workflow can have its own set of conditions and actions. + +Priority: JIRA allows users to set priorities for issues to help determine the order in which they should be addressed. The priority can be set to one of five levels: Blocker, Critical, Major, Minor, and Trivial. Blocker is the highest priority and Trivial is the lowest priority. + +Labels: Labels are used to tag issues with keywords or phrases that can help with searching and filtering. + +Components: Components are used to group related issues together. For example, a software project might have components for the user interface, database, and networking. + +## See Also + +- [Apache contributor documentation](http://www.apache.org/dev/contributors.html) +- [Apache voting documentation](http://www.apache.org/foundation/voting.html) + diff --git a/doc/content/en/project/License/_index.md b/doc/content/en/project/License/_index.md new file mode 100755 index 00000000000..1840ccfc0aa --- /dev/null +++ b/doc/content/en/project/License/_index.md @@ -0,0 +1,29 @@ +--- +title: "License" +linkTitle: "License" +weight: 11 +manualLink: https://www.apache.org/licenses/ +--- + + + +Apache Avro project is licensed under [Apache Software License 2.0](https://www.apache.org/licenses/LICENSE-2.0) diff --git a/doc/content/en/project/Papers/_index.md b/doc/content/en/project/Papers/_index.md new file mode 100755 index 00000000000..beaace16894 --- /dev/null +++ b/doc/content/en/project/Papers/_index.md @@ -0,0 +1,73 @@ +--- +title: "Papers" +linkTitle: "Papers" +weight: 3 +--- + + +** ** + +**A Benchmark of JSON-compatible Binary Serialization Specifications** +Jan 9 2022, by Juan Cruz Viotti, Mital Kinderkhedia. + +https://arxiv.org/abs/2201.03051 + +** ** + +**A Survey of JSON-compatible Binary Serialization Specifications** +Jan 6 2022, by Juan Cruz Viotti, Mital Kinderkhedia. + +https://arxiv.org/abs/2201.02089 + +** ** + +**Putting Avro into Hive** +Apr 2017, by S. Sreekanth, A Sai Ram Pramodhini, Ch S Likita, Chiluka Manisha. + +https://journals.pen2print.org/index.php/ijr/article/view/7377/0 + + +** ** + +**Benchmarking Performance of Data Serialization and RPC Frameworks in Microservices Architecture: gRPC vs. Apache Thrift vs. Apache Avro** +Oct 27 2016, by Nguyen, Thuy. + +https://aaltodoc.aalto.fi/handle/123456789/23386 + +** ** + +**Apache Avro** +Sep 30 2016, by Deepak Vohra. + +https://link.springer.com/chapter/10.1007/978-1-4842-2199-0_7 + +** ** + + +**Object serialization vs relational data modelling in Apache Cassandra: a performance evaluation** +Apr 2015, by Valdemar Johansen. + +https://www.diva-portal.org/smash/get/diva2:839521/FULLTEXT02.pdf + + + + diff --git a/doc/content/en/project/Privacy policy/_index.md b/doc/content/en/project/Privacy policy/_index.md new file mode 100755 index 00000000000..0be9694d7d4 --- /dev/null +++ b/doc/content/en/project/Privacy policy/_index.md @@ -0,0 +1,29 @@ +--- +title: "Privacy policy" +linkTitle: "Privacy policy" +weight: 9 +manualLink: https://privacy.apache.org/policies/privacy-policy-public.html +--- + + + +Apache Avro project shares the same privacy policy as the [Apache Software Foundation](https://privacy.apache.org/policies/privacy-policy-public.html) diff --git a/doc/content/en/project/Security/_index.md b/doc/content/en/project/Security/_index.md new file mode 100755 index 00000000000..5346d414f22 --- /dev/null +++ b/doc/content/en/project/Security/_index.md @@ -0,0 +1,68 @@ +--- +title: "Security" +linkTitle: "Security" +weight: 10 +--- + + + +Security Policy +=============== + +Apache Avro project shares the same security policy as +the [Apache Software Foundation](https://www.apache.org/security/). + + +Security Model +============== + +The Avro library implementations are designed to read and write any data conforming +to a schema. Transport is outside the scope of the Avro library: applications using +Avro should be surrounded by security measures that prevent attackers from writing +random data and otherwise interfering with the consumers of schemas. + +Although the Avro library will not read or write data except as directed to by +invoking it, avoiding leaking data into a side channel like log files is a non-goal +security-wise for Avro. This means, for example, that you will need to catch and +handle exceptions instead of simply writing them to a log file. + +In some cases, like schema parsing, type conversions and based on explicit schema +properties, Avro can execute code provided by the environment. Avro has opt-in +mechanisms for code that is eligible for execution. Applications using Avro should +have a secured supply chain, ensuring code registered to be executed is safe. + +This supply chain also includes the schemas being used: if they are user provided, +additional validation is strongly advised. Such validation can use the parsed schema, +as schema parsing itself is safe: the parser allows SPIs, but is not otherwise +configurable. + + +Summary +------- + +In short, using Avro is safe, provided applications: + +* are surrounded by security measures that prevent attackers from writing random + data and otherwise interfering with the consumers of schemas +* avoid leaking data by, for example, catching and handling exceptions +* have a secured supply chain, ensuring code registered to be executed is safe +* if schemas are user provided, validate the parsed schema before use diff --git a/doc/content/en/project/Thanks/_index.md b/doc/content/en/project/Thanks/_index.md new file mode 100755 index 00000000000..b2ae7dff925 --- /dev/null +++ b/doc/content/en/project/Thanks/_index.md @@ -0,0 +1,29 @@ +--- +title: "Thanks" +linkTitle: "Thanks" +weight: 14 +manualLink: https://www.apache.org/foundation/thanks.html +--- + + + +Apache Avro project could not exist without the continued generous support from the community! We would like to take this opportunity to thank the ASF [Sponsors](https://www.apache.org/foundation/thanks.html). diff --git a/doc/content/en/project/_index.md b/doc/content/en/project/_index.md new file mode 100755 index 00000000000..19a78e4205d --- /dev/null +++ b/doc/content/en/project/_index.md @@ -0,0 +1,35 @@ +--- +title: "Project" +linkTitle: "Project" +weight: 1 +layout: project +menu: + main: + weight: 1 +aliases: +- /linkmap.html + +--- + + + +Apache Avro project is a member of the Apache Software Foundation! diff --git a/doc/content/en/project/pmc onboarding guide/_index.md b/doc/content/en/project/pmc onboarding guide/_index.md new file mode 100644 index 00000000000..22d7545eda8 --- /dev/null +++ b/doc/content/en/project/pmc onboarding guide/_index.md @@ -0,0 +1,34 @@ +--- +title: "PMC onboarding guide" +linkTitle: "PMC onboarding guide" +weight: 6 +--- + + + +1. Use https://whimsy.apache.org you can check that you got added to the PMC list properly +2. Validate you are in the PMC group in JIRA and the Confluence Wiki +3. Subscribe to private@avro.apache.org; you can use whimsy to do this for whatever email account you want, or send mail from that mail address to private-subscribe@ +4. You should have access also to https://reporter.apache.org which seeds our board reports +5. You can now access and read the private list archive (for linking to vote threads, etc) at https://lists.apache.org/list.html?private@avro.apache.org +6. Review the ASF PMC guides. There are a few, but you should re-read what the responsibilities are. +7. The PMC keeps a set of valuable resources in https://svn.apache.org/repos/private/pmc \ No newline at end of file diff --git a/doc/content/en/search.md b/doc/content/en/search.md new file mode 100644 index 00000000000..5ac34d9ea48 --- /dev/null +++ b/doc/content/en/search.md @@ -0,0 +1,25 @@ +--- +title: Search Results +layout: search +--- + + diff --git a/doc/examples/example.py b/doc/examples/example.py index 7b88c1cc195..f81bbe67cc4 100644 --- a/doc/examples/example.py +++ b/doc/examples/example.py @@ -16,18 +16,28 @@ # specific language governing permissions and limitations # under the License. # +from pathlib import Path + import avro.schema from avro.datafile import DataFileReader, DataFileWriter from avro.io import DatumReader, DatumWriter -schema = avro.schema.parse(open("user.avsc").read()) +# read in the schema file +schema_text = Path("user.avsc").read_text() +# then parse it +schema = avro.schema.parse(schema_text) -writer = DataFileWriter(open("/tmp/users.avro", "w"), DatumWriter(), schema) -writer.append({"name": "Alyssa", "favorite_number": 256, "WTF": 2}) -writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"}) -writer.close() +# create a DataFileWriter to write data to a file +users_file = Path("/tmp/users.avro") +with users_file.open("wb") as users_fh, DataFileWriter( + users_fh, DatumWriter(), schema +) as writer: + writer.append({"name": "Alyssa", "favorite_number": 256}) + writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"}) -reader = DataFileReader(open("/tmp/users.avro", "r"), DatumReader()) -for user in reader: - print user -reader.close() +# create a DataFileReader to read data from a file +with users_file.open("rb") as users_fh, DataFileReader( + users_fh, DatumReader() +) as reader: + for user in reader: + print(user) diff --git a/doc/examples/java-example/pom.xml b/doc/examples/java-example/pom.xml index d741dfe7a4f..2fb9ba89d36 100644 --- a/doc/examples/java-example/pom.xml +++ b/doc/examples/java-example/pom.xml @@ -26,6 +26,7 @@ java-example https://maven.apache.org + 1.12.1 UTF-8 @@ -38,7 +39,7 @@ org.apache.avro avro - 1.10.2 + ${avro.version} @@ -48,36 +49,27 @@ maven-compiler-plugin 3.8.1 - 1.8 - 1.8 + 11 + 11 org.apache.avro avro-maven-plugin - 1.10.2 + ${avro.version} + + ${project.basedir}/../ + ${project.basedir}/src/main/java/ + generate-sources schema - - ${project.basedir}/../ - ${project.basedir}/src/main/java/ - - - org.apache.maven.plugins - maven-plugin - 1.10.2 - - 1.8 - 1.8 - - @@ -92,7 +84,7 @@ org.apache.avro avro-maven-plugin - [1.10.2,) + [${avro.version},) schema diff --git a/doc/examples/mr-example/pom.xml b/doc/examples/mr-example/pom.xml index 2f64b35ec8e..089adf79764 100644 --- a/doc/examples/mr-example/pom.xml +++ b/doc/examples/mr-example/pom.xml @@ -28,6 +28,7 @@ mr-example + 1.12.1 UTF-8 @@ -38,14 +39,14 @@ maven-compiler-plugin 3.8.1 - 1.8 - 1.8 + 11 + 11 org.apache.avro avro-maven-plugin - 1.10.0 + ${avro.version} generate-sources @@ -73,7 +74,7 @@ org.apache.avro avro-maven-plugin - [1.10.0,) + [${avro.version},) schema @@ -94,12 +95,12 @@ org.apache.avro avro - 1.10.2 + ${avro.version} org.apache.avro avro-mapred - 1.10.2 + ${avro.version} org.apache.hadoop diff --git a/lang/rust/examples/to_value.rs b/doc/go.mod similarity index 76% rename from lang/rust/examples/to_value.rs rename to doc/go.mod index 622554bd054..2837284801c 100644 --- a/lang/rust/examples/to_value.rs +++ b/doc/go.mod @@ -15,15 +15,12 @@ // specific language governing permissions and limitations // under the License. -#[derive(Debug, serde::Serialize)] -struct Test { - a: i64, - b: &'static str, -} +module github.com/apache/avro -fn main() -> anyhow::Result<()> { - let test = Test { a: 27, b: "foo" }; - let value = avro_rs::to_value(test)?; - println!("{:?}", value); - Ok(()) -} +go 1.22 + +require ( + github.com/FortAwesome/Font-Awesome v0.0.0-20240402185447-c0f460dca7f7 // indirect + github.com/google/docsy v0.10.0 // indirect + github.com/twbs/bootstrap v5.3.3+incompatible // indirect +) diff --git a/doc/go.sum b/doc/go.sum new file mode 100644 index 00000000000..69719376425 --- /dev/null +++ b/doc/go.sum @@ -0,0 +1,6 @@ +github.com/FortAwesome/Font-Awesome v0.0.0-20240402185447-c0f460dca7f7 h1:2aWEKCRLqQ9nPyXaz4/IYtRrDr3PzEiX0DUSUr2/EDs= +github.com/FortAwesome/Font-Awesome v0.0.0-20240402185447-c0f460dca7f7/go.mod h1:IUgezN/MFpCDIlFezw3L8j83oeiIuYoj28Miwr/KUYo= +github.com/google/docsy v0.10.0 h1:6tMDacPwAyRWNCfvsn/9qGOZDQ8b0aRzjRZvnZPY5dg= +github.com/google/docsy v0.10.0/go.mod h1:c0nIAqmRTOuJ01F85U/wJPQtc3Zj9N58Kea9bOT2AJc= +github.com/twbs/bootstrap v5.3.3+incompatible h1:goFoqinzdHfkeegpFP7pvhbd0g+A3O2hbU3XCjuNrEQ= +github.com/twbs/bootstrap v5.3.3+incompatible/go.mod h1:fZTSrkpSf0/HkL0IIJzvVspTt1r9zuf7XlZau8kpcY0= diff --git a/doc/layouts/404.html b/doc/layouts/404.html new file mode 100644 index 00000000000..4d5d5158a77 --- /dev/null +++ b/doc/layouts/404.html @@ -0,0 +1,29 @@ + + +{{ define "main"}} +
+
+

Not found

+

Oops! This page doesn't exist. Try going back to our home page.

+
+
+{{ end }} diff --git a/doc/layouts/partials/favicons.html b/doc/layouts/partials/favicons.html new file mode 100644 index 00000000000..7ff1b9f01c1 --- /dev/null +++ b/doc/layouts/partials/favicons.html @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + + + diff --git a/doc/layouts/partials/footer.html b/doc/layouts/partials/footer.html new file mode 100644 index 00000000000..0638c0074de --- /dev/null +++ b/doc/layouts/partials/footer.html @@ -0,0 +1,63 @@ + + +{{ $links := .Site.Params.links }} +
+
+
+
+ {{ with $links }} + {{ with index . "user"}} + {{ template "footer-links-block" . }} + {{ end }} + {{ end }} +
+
+ {{ with $links }} + {{ with index . "developer"}} + {{ template "footer-links-block" . }} + {{ end }} + {{ end }} +
+
+ {{ with .Site.Params }}© {{ now.Year}} {{ .copyright }} {{ T "footer_all_rights_reserved" }}{{ end }} + {{ if not .Site.Params.ui.footer_about_disable }} + {{ with .Site.GetPage "about" }}

{{ .Title }}

{{ end }} + {{ end }} +

Apache Avro, Avro™, Apache®, and the Apache feather logo are either registered trademarks or trademarks of The Apache Software Foundation.

+
+
+ {{ with .Site.Params }}{{ end }} +
+
+
+
+{{ define "footer-links-block" }} +
    + {{ range . }} +
  • + + + +
  • + {{ end }} +
+{{ end }} diff --git a/doc/layouts/partials/navbar-asf-links.html b/doc/layouts/partials/navbar-asf-links.html new file mode 100644 index 00000000000..e85b4b9275e --- /dev/null +++ b/doc/layouts/partials/navbar-asf-links.html @@ -0,0 +1,31 @@ + + + diff --git a/doc/layouts/partials/navbar-docs-selector.html b/doc/layouts/partials/navbar-docs-selector.html new file mode 100644 index 00000000000..23b87d3d2ea --- /dev/null +++ b/doc/layouts/partials/navbar-docs-selector.html @@ -0,0 +1,31 @@ + + + diff --git a/doc/layouts/partials/navbar.html b/doc/layouts/partials/navbar.html new file mode 100644 index 00000000000..12d4fa58f36 --- /dev/null +++ b/doc/layouts/partials/navbar.html @@ -0,0 +1,58 @@ + + +{{ $cover := and (.HasShortcode "blocks/cover") (not .Site.Params.ui.navbar_translucent_over_cover_disable) }} + diff --git a/doc/layouts/project/baseof.html b/doc/layouts/project/baseof.html new file mode 100644 index 00000000000..9ec1e4d1793 --- /dev/null +++ b/doc/layouts/project/baseof.html @@ -0,0 +1,53 @@ + + + + + + {{ partial "head.html" . }} + + +
+ {{ partial "navbar.html" . }} +
+
+
+
+ + +
+ {{ partial "version-banner.html" . }} + {{ if not .Site.Params.ui.breadcrumb_disable }}{{ partial "breadcrumb.html" . }}{{ end }} + {{ block "main" . }}{{ end }} +
+
+
+ {{ partial "footer.html" . }} +
+ {{ partial "scripts.html" . }} + + diff --git a/doc/layouts/project/baseof.print.html b/doc/layouts/project/baseof.print.html new file mode 100644 index 00000000000..b74e38c0e2e --- /dev/null +++ b/doc/layouts/project/baseof.print.html @@ -0,0 +1,47 @@ + + + + + + {{ partial "head.html" . }} + + +
+ {{ partial "navbar.html" . }} +
+
+
+
+
+
+
+
+
+ {{ block "main" . }}{{ end }} +
+
+
+ {{ partial "footer.html" . }} +
+ {{ partial "scripts.html" . }} + + diff --git a/doc/layouts/project/list.html b/doc/layouts/project/list.html new file mode 100644 index 00000000000..1afebe8d8bb --- /dev/null +++ b/doc/layouts/project/list.html @@ -0,0 +1,48 @@ +{{ define "main" }} + +
+

{{ .Title }}

+ {{ with .Params.description }}
{{ . | markdownify }}
{{ end }} + + {{ .Content }} + {{ partial "section-index.html" . }} + {{ if (.Site.Config.Services.Disqus.Shortname) }} +
+ {{ partial "disqus-comment.html" . }} + {{ end }} + {{ partial "page-meta-lastmod.html" . }} +
+{{ end }} diff --git a/doc/layouts/project/list.print.html b/doc/layouts/project/list.print.html new file mode 100644 index 00000000000..33fa25d9fe1 --- /dev/null +++ b/doc/layouts/project/list.print.html @@ -0,0 +1,23 @@ +{{ define "main" }} + +{{ partial "print/render" . }} +{{ end }} diff --git a/doc/layouts/project/single.html b/doc/layouts/project/single.html new file mode 100644 index 00000000000..bbc65acfe9d --- /dev/null +++ b/doc/layouts/project/single.html @@ -0,0 +1,24 @@ + + +{{ define "main" }} +{{ .Render "content" }} +{{ end }} diff --git a/doc/layouts/shortcodes/avro_version.html b/doc/layouts/shortcodes/avro_version.html new file mode 100644 index 00000000000..04a4bf5ef24 --- /dev/null +++ b/doc/layouts/shortcodes/avro_version.html @@ -0,0 +1,24 @@ +{{/* + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +*/}}{{/* + +This file must not have a trailing newline. + +*/}}{{ $.Site.Params.avroversion }} \ No newline at end of file diff --git a/doc/layouts/shortcodes/project_logo.html b/doc/layouts/shortcodes/project_logo.html new file mode 100644 index 00000000000..41ed606409b --- /dev/null +++ b/doc/layouts/shortcodes/project_logo.html @@ -0,0 +1,22 @@ +{{/* + +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + +https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. + +*/}}{{/* +This file should not have a trailing newline. +*/}}{{ with resources.Get "/images/logo-text.svg" }}{{ (.|minify).Content | safeHTML }}{{ end }} diff --git a/doc/package-lock.json b/doc/package-lock.json new file mode 100644 index 00000000000..76d325ae70d --- /dev/null +++ b/doc/package-lock.json @@ -0,0 +1,919 @@ +{ + "name": "doc", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "devDependencies": { + "autoprefixer": "^10.5.0", + "postcss": "^8.5.14", + "postcss-cli": "^11.0.1" + } + }, + "node_modules/ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "dev": true, + "license": "MIT", + "dependencies": { + "color-convert": "^2.0.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/anymatch": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.3.tgz", + "integrity": "sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==", + "dev": true, + "license": "ISC", + "dependencies": { + "normalize-path": "^3.0.0", + "picomatch": "^2.0.4" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/autoprefixer": { + "version": "10.5.0", + "resolved": "https://registry.npmjs.org/autoprefixer/-/autoprefixer-10.5.0.tgz", + "integrity": "sha512-FMhOoZV4+qR6aTUALKX2rEqGG+oyATvwBt9IIzVR5rMa2HRWPkxf+P+PAJLD1I/H5/II+HuZcBJYEFBpq39ong==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/postcss/" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/autoprefixer" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "dependencies": { + "browserslist": "^4.28.2", + "caniuse-lite": "^1.0.30001787", + "fraction.js": "^5.3.4", + "picocolors": "^1.1.1", + "postcss-value-parser": "^4.2.0" + }, + "bin": { + "autoprefixer": "bin/autoprefixer" + }, + "engines": { + "node": "^10 || ^12 || >=14" + }, + "peerDependencies": { + "postcss": "^8.1.0" + } + }, + "node_modules/baseline-browser-mapping": { + "version": "2.10.20", + "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.10.20.tgz", + "integrity": "sha512-1AaXxEPfXT+GvTBJFuy4yXVHWJBXa4OdbIebGN/wX5DlsIkU0+wzGnd2lOzokSk51d5LUmqjgBLRLlypLUqInQ==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "baseline-browser-mapping": "dist/cli.cjs" + }, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/binary-extensions": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.3.0.tgz", + "integrity": "sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/braces": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz", + "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==", + "dev": true, + "license": "MIT", + "dependencies": { + "fill-range": "^7.1.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/browserslist": { + "version": "4.28.2", + "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.28.2.tgz", + "integrity": "sha512-48xSriZYYg+8qXna9kwqjIVzuQxi+KYWp2+5nCYnYKPTr0LvD89Jqk2Or5ogxz0NUMfIjhh2lIUX/LyX9B4oIg==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/browserslist" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "dependencies": { + "baseline-browser-mapping": "^2.10.12", + "caniuse-lite": "^1.0.30001782", + "electron-to-chromium": "^1.5.328", + "node-releases": "^2.0.36", + "update-browserslist-db": "^1.2.3" + }, + "bin": { + "browserslist": "cli.js" + }, + "engines": { + "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7" + } + }, + "node_modules/caniuse-lite": { + "version": "1.0.30001788", + "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001788.tgz", + "integrity": "sha512-6q8HFp+lOQtcf7wBK+uEenxymVWkGKkjFpCvw5W25cmMwEDU45p1xQFBQv8JDlMMry7eNxyBaR+qxgmTUZkIRQ==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/caniuse-lite" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "CC-BY-4.0" + }, + "node_modules/chokidar": { + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz", + "integrity": "sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==", + "dev": true, + "license": "MIT", + "dependencies": { + "anymatch": "~3.1.2", + "braces": "~3.0.2", + "glob-parent": "~5.1.2", + "is-binary-path": "~2.1.0", + "is-glob": "~4.0.1", + "normalize-path": "~3.0.0", + "readdirp": "~3.6.0" + }, + "engines": { + "node": ">= 8.10.0" + }, + "funding": { + "url": "https://paulmillr.com/funding/" + }, + "optionalDependencies": { + "fsevents": "~2.3.2" + } + }, + "node_modules/cliui": { + "version": "8.0.1", + "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz", + "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==", + "dev": true, + "license": "ISC", + "dependencies": { + "string-width": "^4.2.0", + "strip-ansi": "^6.0.1", + "wrap-ansi": "^7.0.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "color-name": "~1.1.4" + }, + "engines": { + "node": ">=7.0.0" + } + }, + "node_modules/color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "dev": true, + "license": "MIT" + }, + "node_modules/dependency-graph": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/dependency-graph/-/dependency-graph-1.0.0.tgz", + "integrity": "sha512-cW3gggJ28HZ/LExwxP2B++aiKxhJXMSIt9K48FOXQkm+vuG5gyatXnLsONRJdzO/7VfjDIiaOOa/bs4l464Lwg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=4" + } + }, + "node_modules/electron-to-chromium": { + "version": "1.5.340", + "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.340.tgz", + "integrity": "sha512-908qahOGocRMinT2nM3ajCEM99H4iPdv84eagPP3FfZy/1ZGeOy2CZYzjhms81ckOPCXPlW7LkY4XpxD8r1DrA==", + "dev": true, + "license": "ISC" + }, + "node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "dev": true, + "license": "MIT" + }, + "node_modules/escalade": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz", + "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/fill-range": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", + "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==", + "dev": true, + "license": "MIT", + "dependencies": { + "to-regex-range": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/fraction.js": { + "version": "5.3.4", + "resolved": "https://registry.npmjs.org/fraction.js/-/fraction.js-5.3.4.tgz", + "integrity": "sha512-1X1NTtiJphryn/uLQz3whtY6jK3fTqoE3ohKs0tT+Ujr1W59oopxmoEh7Lu5p6vBaPbgoM0bzveAW4Qi5RyWDQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": "*" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/rawify" + } + }, + "node_modules/fs-extra": { + "version": "11.3.2", + "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.3.2.tgz", + "integrity": "sha512-Xr9F6z6up6Ws+NjzMCZc6WXg2YFRlrLP9NQDO3VQrWrfiojdhS56TzueT88ze0uBdCTwEIhQ3ptnmKeWGFAe0A==", + "dev": true, + "license": "MIT", + "dependencies": { + "graceful-fs": "^4.2.0", + "jsonfile": "^6.0.1", + "universalify": "^2.0.0" + }, + "engines": { + "node": ">=14.14" + } + }, + "node_modules/fsevents": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", + "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/get-caller-file": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", + "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==", + "dev": true, + "license": "ISC", + "engines": { + "node": "6.* || 8.* || >= 10.*" + } + }, + "node_modules/glob-parent": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", + "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", + "dev": true, + "license": "ISC", + "dependencies": { + "is-glob": "^4.0.1" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/graceful-fs": { + "version": "4.2.11", + "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", + "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==", + "dev": true, + "license": "ISC" + }, + "node_modules/is-binary-path": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz", + "integrity": "sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw==", + "dev": true, + "license": "MIT", + "dependencies": { + "binary-extensions": "^2.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/is-extglob": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", + "integrity": "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/is-fullwidth-code-point": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", + "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/is-glob": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz", + "integrity": "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==", + "dev": true, + "license": "MIT", + "dependencies": { + "is-extglob": "^2.1.1" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/is-number": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", + "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.12.0" + } + }, + "node_modules/jsonfile": { + "version": "6.2.0", + "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-6.2.0.tgz", + "integrity": "sha512-FGuPw30AdOIUTRMC2OMRtQV+jkVj2cfPqSeWXv1NEAJ1qZ5zb1X6z1mFhbfOB/iy3ssJCD+3KuZ8r8C3uVFlAg==", + "dev": true, + "license": "MIT", + "dependencies": { + "universalify": "^2.0.0" + }, + "optionalDependencies": { + "graceful-fs": "^4.1.6" + } + }, + "node_modules/lilconfig": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-3.1.3.tgz", + "integrity": "sha512-/vlFKAoH5Cgt3Ie+JLhRbwOsCQePABiU3tJ1egGvyQ+33R/vcwM2Zl2QR/LzjsBeItPt3oSVXapn+m4nQDvpzw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/antonk52" + } + }, + "node_modules/nanoid": { + "version": "3.3.11", + "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz", + "integrity": "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "bin": { + "nanoid": "bin/nanoid.cjs" + }, + "engines": { + "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" + } + }, + "node_modules/node-releases": { + "version": "2.0.37", + "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.37.tgz", + "integrity": "sha512-1h5gKZCF+pO/o3Iqt5Jp7wc9rH3eJJ0+nh/CIoiRwjRxde/hAHyLPXYN4V3CqKAbiZPSeJFSWHmJsbkicta0Eg==", + "dev": true, + "license": "MIT" + }, + "node_modules/normalize-path": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz", + "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/picocolors": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", + "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==", + "dev": true, + "license": "ISC" + }, + "node_modules/picomatch": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz", + "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8.6" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, + "node_modules/pify": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/pify/-/pify-2.3.0.tgz", + "integrity": "sha512-udgsAY+fTnvv7kI7aaxbqwWNb0AHiB0qBO89PZKPkoTmGOgdbrHDKD+0B2X4uTfJ/FT1R09r9gTsjUjNJotuog==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/postcss": { + "version": "8.5.14", + "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.14.tgz", + "integrity": "sha512-SoSL4+OSEtR99LHFZQiJLkT59C5B1amGO1NzTwj7TT1qCUgUO6hxOvzkOYxD+vMrXBM3XJIKzokoERdqQq/Zmg==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/postcss/" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/postcss" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "dependencies": { + "nanoid": "^3.3.11", + "picocolors": "^1.1.1", + "source-map-js": "^1.2.1" + }, + "engines": { + "node": "^10 || ^12 || >=14" + } + }, + "node_modules/postcss-cli": { + "version": "11.0.1", + "resolved": "https://registry.npmjs.org/postcss-cli/-/postcss-cli-11.0.1.tgz", + "integrity": "sha512-0UnkNPSayHKRe/tc2YGW6XnSqqOA9eqpiRMgRlV1S6HdGi16vwJBx7lviARzbV1HpQHqLLRH3o8vTcB0cLc+5g==", + "dev": true, + "license": "MIT", + "dependencies": { + "chokidar": "^3.3.0", + "dependency-graph": "^1.0.0", + "fs-extra": "^11.0.0", + "picocolors": "^1.0.0", + "postcss-load-config": "^5.0.0", + "postcss-reporter": "^7.0.0", + "pretty-hrtime": "^1.0.3", + "read-cache": "^1.0.0", + "slash": "^5.0.0", + "tinyglobby": "^0.2.12", + "yargs": "^17.0.0" + }, + "bin": { + "postcss": "index.js" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "postcss": "^8.0.0" + } + }, + "node_modules/postcss-load-config": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/postcss-load-config/-/postcss-load-config-5.1.0.tgz", + "integrity": "sha512-G5AJ+IX0aD0dygOE0yFZQ/huFFMSNneyfp0e3/bT05a8OfPC5FUoZRPfGijUdGOJNMewJiwzcHJXFafFzeKFVA==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/postcss/" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "dependencies": { + "lilconfig": "^3.1.1", + "yaml": "^2.4.2" + }, + "engines": { + "node": ">= 18" + }, + "peerDependencies": { + "jiti": ">=1.21.0", + "postcss": ">=8.0.9", + "tsx": "^4.8.1" + }, + "peerDependenciesMeta": { + "jiti": { + "optional": true + }, + "postcss": { + "optional": true + }, + "tsx": { + "optional": true + } + } + }, + "node_modules/postcss-reporter": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/postcss-reporter/-/postcss-reporter-7.1.0.tgz", + "integrity": "sha512-/eoEylGWyy6/DOiMP5lmFRdmDKThqgn7D6hP2dXKJI/0rJSO1ADFNngZfDzxL0YAxFvws+Rtpuji1YIHj4mySA==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/postcss/" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "dependencies": { + "picocolors": "^1.0.0", + "thenby": "^1.3.4" + }, + "engines": { + "node": ">=10" + }, + "peerDependencies": { + "postcss": "^8.1.0" + } + }, + "node_modules/postcss-value-parser": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/postcss-value-parser/-/postcss-value-parser-4.2.0.tgz", + "integrity": "sha512-1NNCs6uurfkVbeXG4S8JFT9t19m45ICnif8zWLd5oPSZ50QnwMfK+H3jv408d4jw/7Bttv5axS5IiHoLaVNHeQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/pretty-hrtime": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/pretty-hrtime/-/pretty-hrtime-1.0.3.tgz", + "integrity": "sha512-66hKPCr+72mlfiSjlEB1+45IjXSqvVAIy6mocupoww4tBFE9R9IhwwUGoI4G++Tc9Aq+2rxOt0RFU6gPcrte0A==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/read-cache": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/read-cache/-/read-cache-1.0.0.tgz", + "integrity": "sha512-Owdv/Ft7IjOgm/i0xvNDZ1LrRANRfew4b2prF3OWMQLxLfu3bS8FVhCsrSCMK4lR56Y9ya+AThoTpDCTxCmpRA==", + "dev": true, + "license": "MIT", + "dependencies": { + "pify": "^2.3.0" + } + }, + "node_modules/readdirp": { + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz", + "integrity": "sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==", + "dev": true, + "license": "MIT", + "dependencies": { + "picomatch": "^2.2.1" + }, + "engines": { + "node": ">=8.10.0" + } + }, + "node_modules/require-directory": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", + "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/slash": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/slash/-/slash-5.1.0.tgz", + "integrity": "sha512-ZA6oR3T/pEyuqwMgAKT0/hAv8oAXckzbkmR0UkUosQ+Mc4RxGoJkRmwHgHufaenlyAgE1Mxgpdcrf75y6XcnDg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=14.16" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/source-map-js": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", + "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==", + "dev": true, + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/string-width": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "dev": true, + "license": "MIT", + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/thenby": { + "version": "1.3.4", + "resolved": "https://registry.npmjs.org/thenby/-/thenby-1.3.4.tgz", + "integrity": "sha512-89Gi5raiWA3QZ4b2ePcEwswC3me9JIg+ToSgtE0JWeCynLnLxNr/f9G+xfo9K+Oj4AFdom8YNJjibIARTJmapQ==", + "dev": true, + "license": "Apache-2.0" + }, + "node_modules/tinyglobby": { + "version": "0.2.15", + "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz", + "integrity": "sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "fdir": "^6.5.0", + "picomatch": "^4.0.3" + }, + "engines": { + "node": ">=12.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/SuperchupuDev" + } + }, + "node_modules/tinyglobby/node_modules/fdir": { + "version": "6.5.0", + "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz", + "integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12.0.0" + }, + "peerDependencies": { + "picomatch": "^3 || ^4" + }, + "peerDependenciesMeta": { + "picomatch": { + "optional": true + } + } + }, + "node_modules/tinyglobby/node_modules/picomatch": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", + "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, + "node_modules/to-regex-range": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", + "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "is-number": "^7.0.0" + }, + "engines": { + "node": ">=8.0" + } + }, + "node_modules/universalify": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.1.tgz", + "integrity": "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 10.0.0" + } + }, + "node_modules/update-browserslist-db": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.2.3.tgz", + "integrity": "sha512-Js0m9cx+qOgDxo0eMiFGEueWztz+d4+M3rGlmKPT+T4IS/jP4ylw3Nwpu6cpTTP8R1MAC1kF4VbdLt3ARf209w==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/browserslist" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "dependencies": { + "escalade": "^3.2.0", + "picocolors": "^1.1.1" + }, + "bin": { + "update-browserslist-db": "cli.js" + }, + "peerDependencies": { + "browserslist": ">= 4.21.0" + } + }, + "node_modules/wrap-ansi": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", + "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-styles": "^4.0.0", + "string-width": "^4.1.0", + "strip-ansi": "^6.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, + "node_modules/y18n": { + "version": "5.0.8", + "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", + "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==", + "dev": true, + "license": "ISC", + "engines": { + "node": ">=10" + } + }, + "node_modules/yaml": { + "version": "2.8.1", + "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.1.tgz", + "integrity": "sha512-lcYcMxX2PO9XMGvAJkJ3OsNMw+/7FKes7/hgerGUYWIoWu5j/+YQqcZr5JnPZWzOsEBgMbSbiSTn/dv/69Mkpw==", + "dev": true, + "license": "ISC", + "bin": { + "yaml": "bin.mjs" + }, + "engines": { + "node": ">= 14.6" + } + }, + "node_modules/yargs": { + "version": "17.7.2", + "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz", + "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==", + "dev": true, + "license": "MIT", + "dependencies": { + "cliui": "^8.0.1", + "escalade": "^3.1.1", + "get-caller-file": "^2.0.5", + "require-directory": "^2.1.1", + "string-width": "^4.2.3", + "y18n": "^5.0.5", + "yargs-parser": "^21.1.1" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/yargs-parser": { + "version": "21.1.1", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz", + "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==", + "dev": true, + "license": "ISC", + "engines": { + "node": ">=12" + } + } + } +} diff --git a/doc/package.json b/doc/package.json new file mode 100644 index 00000000000..f698348b743 --- /dev/null +++ b/doc/package.json @@ -0,0 +1,7 @@ +{ + "devDependencies": { + "autoprefixer": "^10.5.0", + "postcss": "^8.5.14", + "postcss-cli": "^11.0.1" + } +} diff --git a/doc/src/cli.xconf b/doc/src/cli.xconf deleted file mode 100644 index 85712ac715b..00000000000 --- a/doc/src/cli.xconf +++ /dev/null @@ -1,328 +0,0 @@ - - - - - - - - . - WEB-INF/cocoon.xconf - ../tmp/cocoon-work - ../site - - - - - - - - - - - - - - - index.html - - - - - - - */* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/doc/src/content/htmldocs/canonical-completeness.html b/doc/src/content/htmldocs/canonical-completeness.html deleted file mode 100644 index 0827d57812e..00000000000 --- a/doc/src/content/htmldocs/canonical-completeness.html +++ /dev/null @@ -1,204 +0,0 @@ - - - -Completeness of "Parsing Canonical Form" - - - -

Completeness of "Parsing Canonical Form"

- -

1.0 Introduction

- -

One of the defining characteristics of Avro is that a reader is assumed to have the "same" schema used by the writer of the data the reader is reading. This assumption leads to a data format that's compact and amenable to many forms of schema evolution. However, there are nuances to defining exactly what it means for the reader to have "the same" schema used by the writer. We want to allow, for example, trivial transformations, such as the insertion of whitespace. But we can't allow transformations that change the real meaning of schemas, such as a reordering of fields in a record

- -

To clearly define what it means for a reader to have "the same" schema as a writer, the Avro specification defines Parsing Canonical Form (PCF), a set of transformations on Avro schemas that strip away irrelevencies (e.g., "doc" attributes) and normalize the JSON text (e.g., dealing with whitespace). Two schemas are defined to be "the same" as far as a reader is concerned if and only if their PCFs are textually equal.

- -

We believe that PCF is sound and complete. Soundness means that the PCF of a schema is logically equivalent to the original form, i.e., we can use the PCF in place of the original form without introducing bugs. Completeness is "maximal soundness:" if two schemas are logically equivalent, then their PFCs will be textually identical. The Avro specification claims that PCF is complete when it says: "[if two schemas have the same PCF, then] there is no serialized data that would allow a reader to distinguish data generated by a writer using one of the original schemas from data generated by a writing using the other original schema."

- -

We believe that the transformations that define PCF are "self-evidently" sound to people familiar with Avro. For example, fixing the order of fields in a JSON object, or eliminating irrelevant attributes like doc, or using the simple int in place of {"type":"int"} clearly don't change the meaning of a schema.

- -

Completeness, on the other hand, is much less obvious. How do we know that there aren't two logically equivalent schemas that happen to reduce to different canonical forms? All it takes is one such pair to foil our claim of completeness.

- -

In general, completeness properties like this can be tricky to prove. It turns out that, while soundness is critical to us, completeness is not. If two schemas are operationally equivalent (i.e., a reader can't tell their output apart), but we accidentally treat them as if they are different, then typically all that happens is that we'll do more work. For example, we might generate a decoder object to decode some incoming data when it turns out that we had already cached a decoder object that could do the job. This is not likely to happen often, and thus incompleteness isn't a huge problem.

- -

At the same time, if we knew that our canonical forms were complete, then we might take advantage of that fact in some circumstances (e.g., to serialize schemas). Also, the Schema.equals(Object) method provided in the Avro implementation makes many of the same assumptions made in the PCF definition. Thus, a completeness proof for our canonicalization would give us confidence in the correctness of this equality algorithm. So this issue is not entirely academic.

- -

We haven't worked out a full, formal proof (we hope someone from the community will step up to that task!). However, we've been thinking about it quite a bit, and we thought we'd share our thoughts so far.

- - -

2.0 Completeness argument for Parsing Canonical Form

- -

Our formalization of Avro schemas would be based on interpreting them as grammars. In this interpretation, Avro schemas are grammars that generate tagged data streams. Consider, for example, the following schema for a linked-list: -

-  {"type":"record", "name":"list", "fields":[
-     {"name":"value", "type":"int"},
-     {"name":"tail",  "type":["null", "list"]}
-   ]}
-
-Interpreted as a grammar, it can generate a tagged data-stream that looks like this: -
-  [record,"list"][field,"value"][int,10][field,"tail"][union,1]
-    [record,"list"][field,"value"][int,22][field,"tail"][union,0]
-
-(this is a two-record linked list whose first cell contains the value "10" and second cell the value "22"). Avro schemas can trivially be interpreted as grammars for such tagged data streams. Formal proofs involving Avro schemas can be carried out as proofs about languages and grammars.

- -

So what does it mean for the canonical form of a schema to be "complete?" Let L(S) denote the language generated by the Avro schema S, and C(S) denote the canonical form of the schema. The canonicalization is complete if: -

-For all schemas S1 and S2,
-    L(S1) = L(S2) ⇒ C(S1) = C(S2) -
-That is, for any two schemas that generate the same language, their canonicalizations are textually equivalent. - -

To prove this, we need to define some functions: -

-J is a variable name we often use to denote a JSON expression representing an Avro schema
-C(J) is the Parsing Canonical Form of J as defined in the Avro specification
-P(J) is the ASG for an Avro schema generated by parsing J (think of P(J) as a Schema Java object)
-S is a variable name we often use to denote such ASGs
-L(S) is the language generated by a schema ASG -
-

With all these symbols defined, our completeness criteria is now rendered as: -

-∀ J1, J2: -L(P(J1)) = L(P(J2)) ⇒ C(J1) = C(J2) -
-We'll prove this by breaking it into two parts: -
-(1): ∀ S1, S2: -L(S1) = L(S2) ⇒ S1 ≅ S2
-(2): ∀ J1, J2: -P(J1) ≅ P(J2) ⇒ C(J1) = C(J2) -
-
-In this two-step decomposition, we've introduced a new operator ≅, which compares the ASGs of two Avro schemas. The ASG of an Avro schema can be viewed as a rooted, labeled, directed graph. Because Avro schemas can be recursive, these graphs can be cyclic. The ≅ operator is "true" between two ASGs when the set of minimal labeled paths (no cycles, starting from the root) on the two ASGs are the same. (The Schema.equals(Object) method in the Avro implementation computes something close to this ≅ relation, except that ≅ ignores "irrelevant" attributes like doc and aliases.) - -

It turns out that, implicit in the Avro Specification, there are "canonicalization" rules that are important to our proof of completeness. In particular, the Avro Specification says that a name must be defined "before" it is used, and that a name cannot be defined more than once in a schema. Consider the following redefinition of the linked-list schema, for example: -

-  {"type":"record", "name":"list", "fields":[
-    {"name":"value", "type":"int"},
-    {"name":"tail",
-      "type":["null", {"type":"record", "name":"list", "fields":[
-                        {"name":"value", "type":"int"},
-                        {"name":"tail", "type":["null", "list"]}]}]}
-  ]}
-
-In this redefinition, we've "unpacked" the recursion in the linked list by one level. In some sense, this is a perfectly fine definition of a linked list, and is operationally equivalent to the more compact version given earlier. So it makes sense that our claim of completeness is dependent upon this kind of "unpacking" not occuring in real schemas.

- -

To deal with this issue in our proof, we pretend that the Avro specification does not require that named schemas be defined just once, and be defined "before" they are used. Rather, we treat this requirement as an additional transformation rule in the definition of Parsing Canonical Form: -

    -
  • [MINIMIZE] Eliminate redundant definitions of named types (records, enums, and fixeds). That is, for each named type, have a defining instance that appears at first use, and then use just the name (rather than the full schema) everywhere else.
  • -
-(As in the Avro spec, "first use" is defined as the first occurrence in a depth-first, left-to-right traversal of the schema abstract-syntax graph (ASG).) - -

Getting back to the proof of (1) and (2) from above, we need to introduce more functions: -

-P(J)=PA(PJ(J)) - decompose parser into:
-  PJ is the JSON parser
-  PA is the Avro parser (takes JSON ASTs as input)
-C(J)=CJ(CA(CM(J))) - decompose canonicalization into:
-  CM(J) the MINIMIZE step
-  CA(J) Avro normalizations
-  CJ(J) JSON normalizations
-M(S) is the "named-schema NFA minimzation" of S
-
-"Named-schema NFA minimization" is similar to general NFA minimization, except that we only collapse nodes and edges related to named schema entities and not other nodes. For example, we would not collapse the nodes associated with int or union schemas. - -

Our proof of (1) looks like this (this proof refers to lemmas (3) and (4), which are defined later): -

- - - - - - - - - - -
S1,S2:L(S1)=L(S2)
M(S1)=M(S2)by (3)
S1≅S2by (4)
-
-Here's the proof of (2) (this proof refers to lemmas (4)-(7), which are defined later): -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
J1,J2:P(J1)≅P(J2)
M(P(J1))=M(P(J2))by (4)
P(CM(J1))=P(CM(J2))by (5)
PA(PJ(CM(J1)))=PA(PJ(CM(J2)))by definition of P
PJ(CA(CM(J1)))=PJ(CA(CM(J2)))by (6)
CJ(CA(CM(J1)))=CJ(CA(CM(J2)))by (7)
C(J1)=C(J2)by definition of C
-
- -Here are the lemmas needed above: -
-(3): ∀ S1, S2: -L(S1) = L(S2) ⇒ M(S1) = M(S2)
- -(4): ∀ S1, S2: -M(S1) = M(S2) ⇔ S1 ≅ S2
- -(5): ∀ J: M(P(J)) = P(CM(J))
- -(6): ∀ J1, J2: -PA(PJ(J1)) = PA(PJ(J2)) ⇒ PJ(CA(J1)) = PJ(CA(J2))
- -(7): ∀ J1, J2: -PJ(J1) = PJ(J2) ⇒ CJ(J1) = CJ(J2)
-
- -

Proving the lemmas: -

    -
  1. This says that the language-related part of our canonicalization is complete, i.e., M finds the equivalence-classes of L. I would imagine one could prove this by modifying a proof that the equality of LL(1) grammars is a decidable problem. I haven't gotten very far in showing this, however. -
  2. The right-hand direction of this follows from the definition of minimization. The left-hand direction seems correct, but I'm not sure how to prove it (I think it also follows from the definition of minimization). -
  3. This is showing that the MINIMIZE step (which is done on JSON expressions) is equivalent to doing an named-schema NFA minimization on the ASG representation. This should follow pretty directly from a detailed definition of M, if we provided one. -
  4. This says that the Avro-related part of our canonicalization is complete, i.e., that CA finds equivalence-classes of PA. -
  5. This says that the JSON-related part of our canonicalization is complete, i.e., that CJ finds equivalence-classes of PJ. Note that, implicitly, this lemma ranges over only JSON expressions that are legal Avro schemas with no doc strings or default values, and thus (for example) doesn't need to worry about normalization of floating-point literals. -
- - -

3.0 Concluding remarks

- -Engineers have a history of running ahead of formal mathematical proofs, when things "seem correct" to them. In this case, it seems pretty obvious that Parsing Canonical Form is complete as well as sound, and we should go ahead and treat it as such. At the same time, formal proofs often turn up corner cases and exceptions that are valuable to document and account for. Thus, it'd nice if someone could provide a better completeness argument than we've been able to so far. - - - diff --git a/doc/src/content/htmldocs/performance-testing.html b/doc/src/content/htmldocs/performance-testing.html deleted file mode 100644 index d98992e4118..00000000000 --- a/doc/src/content/htmldocs/performance-testing.html +++ /dev/null @@ -1,173 +0,0 @@ - - - -Testing performance improvements - - - - -(Note: This document pertains only to the Java implementation Avro.) - - -

1.0 Introduction

- -

Recent work on improving the performance of "specific record" (AVRO-2090 and AVRO-2247 has highlighted the need for a benchmark that can be used to test the validity of alleged performance "improvements."

- -

As a starting point, the Avro project has class called Perf (in the test source of the ipc subproject). Perf is a command-line tool contains close to 70 performance individual performance tests. These tests include tests for reading and writing primitive values, arrays and maps, plus tests for reading and writing records through all of the APIs (generic, specific, reflect).

- -

When using Perf for some recent performance work, we encountered two problems. First, because it depends on build artifacts from across the Avro project, it can be tricky to invoke. Second, and more seriously, independent runs of the tests in Perf can vary in performance by as much as 40%. While typical variance is less than that, the variance is high enough that it makes it impossible to tell if a change in performance is simply this noise, or can be properly attributed to a proposed optimization.

- -

This document addresses both problems, the usability problem in Section 2 and the variability issue in Section 3. Regarding the variability issue, as you will see, we haven't really been able to manage it in a fundamental manner. As suggested by Zoltan Frakas, we should look into porting Perf over to using the Java Microbenchmark Harness (JMH).

- - -

2.0 Invoking Perf

- -

2.1 Simple invocation

- -

Here is the easiest way we found to directly invoke Perf.

- -

As mentioned in the Introduction, Perf is dependent upon build artifacts from some of the other Avro subprojects. When you invoke Perf, it should be invoked with your most recent build of those artifacts (assuming you're performance-testing your current work). We have found that the easiest way to ensure the proper artifacts are used is to use Maven to invoke Perf.

- -

The recipe for using Maven in this way is simple. First, from the lang/java directoy, you need to build and install Avro:

- -

    mvn clean install

- -

(You can add -DskipTests to the above command line if you don't need to run test suite.) When this is done, you need to change your working directory to the lang/java/ipc directory. From there, you can invoke Perf with the following command line:

- -

-    mvn exec:java -Dexec.classpathScope=test -Dexec.mainClass=org.apache.avro.io.Perf -Dexec.args="..." -

- -

The exec.args string contains the arguments you want to pass through to the Perf.main function.

- -

To speed up your edit-compile-test loop, you can do a selective build of Avro in addition to skipping tests: - -

    mvn clean && mvn -pl "avro,compiler,maven-plugin,ipc" install -DskipTests

- - - -

2.2 Using the run-perf.sh script

- -

If you're using Perf, chances are that you want to compare the performance of a proposed optimization against the performance of a baseline (that baseline most likely being the current master branch of Avro). Generating this comparative data can be tedious if you're running Perf by hand. To relieve this tedium, you can use the run-perf.sh script instead (found in the share/test directory from the Avro top-level directory).

- -

To use this script, you put different implementations of Avro onto different branches of your Avro git repository. One of these branches is designated the "baseline" branch and the others are the "treatment" branches. The script will run the baseline and all the treatments, and will compare generate a CSV file containing a comparison of the treatments against the baseline.

- -

Running run-perf.sh --help will output a detailed manual-page for this script. Appendix A of this document contains sample invocations of this test script for different use cases.

- -

NOTE: as mentioned in run-perf.sh --help, this script is designed to be run from the lang/java/ipc directory, which is the Maven project containing the Perf program.

- - - -

3.0 Managing variance

- -As mentioned in the introduction, we tried a number of different mechanisms to reduce variance, including: -
    -
  • Varying org.apache.avro.io.perf.count, org.apache.io.perf.cycles, and org.apache.avro.io.perf.use-direct, as well as the number of times we run Perf.java within a single "run" of a test. - -

  • Taking the minimum times across runs, rather than the maximum times, using the second or third run as a baseline rather than the first, using statistical methods to eliminate outlying values. - -

  • Modified the code slightly, for example: starting the timer of a cycle after, rather than before, encoders or decoders are constructed; cacheing encoders and decoders; and reusing record objects during read tests rather than construct new ones for each record being read. - -

  • Using Docker's --cpuset-cpus flag to force the tests onto a single core. - -

  • Using a dedicated EC2 instance (c5d.2xlarge). -
-Of the above, the only change that made a significant difference was the last: in going from a laptop and desktop computer to a dedicated EC2 instances, we went from over 70 tests (out of 200) with a variance of 5% or more between runs to 35. As mentioned in the introduction, we should switch to a framework like JMH to attack this problem more fundamentally. - -

If you want to setup your own EC2 instance for testing, here's how we did it. We launched a dedicated EC2 c5d.2xlarge instance from the AWS console, using the "Amazon Linux 64-bit HVM GP2" AMI. We logged into this instance and ran the following commands to install Docker and Git (we did all our Avro build and testing inside the Docker image): -

-  sudo yum update
-  sudo yum install -y git-all
-  git config --global user.name "Your Name"
-  git config --global user.email email-address-used@github.com
-  git config --global core.editor emacs
-  sudo install -y docker
-  sudo usermod -aG docker ec2-user ## Need to log back in for this to take effect
-  sudo service docker start
-
-At this point you can checkout Avro and launch your Docker container: -
-  git clone https://github.com/apache/avro.git
-  cd avro
-  screen
-  ./build.sh docker --args "--cpuset-cpus 2,6"
-
-Note the use of screen here: executions of run-perf.sh can take a few hours, depending on the configuration. By running it inside of screen, you are protected from an SSH disconnection causing run-perf.sh to prematurely terminate. - -

The --args flag in the last command deserves some explanation. In general, the --args allows you to pass additional arguments to the docker run command executed inside build.sh. In this case, the --cpuset-cpus flag for docker tells docker to schedule the contianer exclusively on the listed (virtual) CPUs. We identified vCPUs 2 and 6 using the lscpu Linux command: -

-  [ec2-user@ip-0-0-0-0 avro]$ lscpu --extended
-  CPU NODE SOCKET CORE L1d:L1i:L2:L3 ONLINE
-  0   0    0      0    0:0:0:0       yes
-  1   0    0      1    1:1:1:0       yes
-  2   0    0      2    2:2:2:0       yes
-  3   0    0      3    3:3:3:0       yes
-  4   0    0      0    0:0:0:0       yes
-  5   0    0      1    1:1:1:0       yes
-  6   0    0      2    2:2:2:0       yes
-  7   0    0      3    3:3:3:0       yes
-
-Notice that (v)CPUs 2 and 6 are both on core 2: it's sufficient to schedule the container on the same core, vs a single vCPU. One final tip: to confirm that your container is running on the expected CPUs, run top and then press the 1 key -- this will show you the load on each individual CPU. - - -

Appendix A: Sample uses of run-perf.sh

- -

A detailed explanation of run-perf.sh is printed when you give it the --help flag. To help you more quickly understand how to use run-perf.sh we present here a few examples of how we used it in our recent testing efforts. - -

To summarize, you invoke it as follows: -

-    ../../../share/test/run-perf.sh [--out-dir D] \
-       [--perf-args STRING] [-Dkey=value]* [--] \
-       [-Dkey=value]* branch_baseline[:name_baseline_run] \
-       [-Dkey=value]* branch_1[:name_treatment_run_1] \
-       ... 
- [-Dkey=value]* branch_n[:name_treatment_run_n]
-
-The path given here is relative to the lang/java/ipc directory, which needs to be the current working directory when calling this script. The script executes multiple runs of testing. The first run is called the baseline run, the subsequent runs are the treatment runs. Each run consists of four identical executions of Perf.java. The running times for each Perf.java test are averaged to obtain the final running time for the test. For each treatment run, the final running times for each test are compared, as a percentage, to the running time for the test in the baseline run. These percentages are output in the file summary.csv. - -

The following invocation is what we used to measure the variance of Perf.java: -

-../../../share/test/run-perf.sh --out-dir ~/calibration \
-    -Dorg.apache.avro.specific.use_custom_coders=true \
-    AVRO-2269:baseline AVRO-2269:run1 AVRO-2269:run2 AVRO-2269:run3
-
-In this invocation, the baseline run and all three treatment runs come from the same Git branch: AVRO-2269. We need to give a name to each run: in this case runs have been named "baseline"--the baseline run--and "run1", "run2", and "run3"--the treatment runs. Note that the name of the Git branch to be used for a run must always be provided, but the name for the run itself (e.g., "baseline") is optional. If a name for the run is not provided, then the name of the Git branch will be used as the name of the run. However, each run must have a unique name, so in this example we had to explicitly name the branches since all runs are on the same branch. - -

run-perf.sh uses Maven to invoke Perf.java. The -D flag is used to pass system properties to Maven, which in turn will pass them through to Perf.java. In the example above, we use this flag to turn on the custom-coders feature recently checked into Avro. Note that initial -D flags will be passed to all runs, while -D switches that come just before the name of Git branch of a run apply to only that run. In the case of the baseline run, which comes first, if you want to pass -D flags to just that run, then use the -- flag to indicate that all global parameters for run-perf.sh have been provided, followed by the -D flags you want to pass to only the baseline run. - -

Finally, note that run-perf.sh generates a lot of intermediate files as well as the final summary.csv file. Thus, it is recommended that the output of each execution of run-pref.sh is sent to a dedicated directory, provided by the --out-dir flag. If that directory does not exist, it will be created. (Observe that run-perf.sh outputs a file called command.txt containing the full command-line used to invoke it. This can be helpful if you run a lot of experiments and forget the detailed setup of some of them along the way.) - -

The next invocation is what we used to ensure that the new "custom coders" optimization for specific records does indeed improve performance: -

-../../../share/test/run-perf.sh --out-dir ~/retest-codegen \
-    --perf-args "-Sf" \
-    AVRO-2269:baseline \
-    -Dorg.apache.avro.specific.use_custom_coders=true AVRO-2269:custom-coders
-
-In this case, unlike the previous one, the -D flag that turns on the use of custom coders is applied specifically to the treatment run, and not globally. Also, since this flag only affects the Specific Record case, we use the --perf-args flag to pass additional arguments to Perf.java; in this case, the -Sf flag tells Perf.java to run just the specific-record related tests and not the entire test suite. - -

This last example shows how we checked the performance impact of two new feature-branches we've been developing: -

-../../../share/test/run-perf.sh --out-dir ~/new-branches \
-    -Dorg.apache.avro.specific.use_custom_coders=true \
-    AVRO-2269:baseline combined-opts full-refactor
-
-In this case, once again, we turn on custom-coders for all runs. In this case, again, the Git branch AVRO-2269 is used for our baseline run. However, in this case, the treatment runs come from two other Git branches: combined-opts and full-refactor. We didn't provide run-names for these runs because the Git branch-name were fine to be used as run names (we explicitly named the first run "baseline" not because we had to, but because we like the convention of using that name). - -

Although we didn't state it before, in preparing for a run, run-perf.sh will checkout the Git branch to be used for the run and use mvn install to build and install it. It does this for each branch, so the invocation just given will checkout and build three different branches during its overall execution. (As an optimization, if one run uses the same branch as the previous run, then the branch is not checked-out or rebuilt between runs.) - - - diff --git a/doc/src/content/mddocs/refactoring-resolution.md b/doc/src/content/mddocs/refactoring-resolution.md deleted file mode 100644 index 860f5c802db..00000000000 --- a/doc/src/content/mddocs/refactoring-resolution.md +++ /dev/null @@ -1,143 +0,0 @@ - - -# Refactoring Resolution -by Raymie Stata - - -## Problem statement - -In the early days of Avro, Schema resolution was implemented in a -number of places, e.g., `GenericDatumReader` as well as -`ResolvingGrammarGenerator`. However, Schema resolution is -complicated and thus error prone. Multiple implementations were hard -to maintain, both for correctness and for updates to the -schema-resolution spec. - -To address the problems of multiple implementations, we converged on -the implementation found in `ResolvingGrammarGenerator` (together with -`ResolvingDecoder`) as the single implementation, and refactored other -parts of Avro to depend on this implementation. - -Converging on a single implementation solved the maintenance problem, -and has served well for a number of years. However, the logic in -`ResolvingGrammarGenerator` does _two_ things: it contains the logic -for _schema resolution_ itself, and it contains the logic for -embedding that logic into a grammar that can be used by -`ResolvingDecoder`. - -Recently, Avro contributors have wanted access to the logic of schema -resolution _apart from_ `ResolvingDecoder`. For example, -[AVRO-2247](https://issues.apache.org/jira/browse/AVRO-2247) proposes -a new, faster approach to implementing `DatumReaders`. The initial -implementation of AVRO-2247 was forced to reimplement Schema -resolution -- going back to the world of multiple implementations -- -because there isn't a reusable implementation of our resolution logic. - -Similarly, as I've been working on extending the performance -improvements of -[AVRO-2090](https://issues.apache.org/jira/browse/AVRO-2090) when -writing data, I've been thinking about the possibilities of dynamic -code generation. Here too, I can't reuse `ResolvingGrammarGenerator`, -which would force me to reimplement the schema-resolution logic. - - -## Proposed solution - -We introduce a new class to encapsulate the logic of schema resolution -independent from the logic of implementing schema resolution as a -`ResolvingDecoder` grammar. In particular, we introduce a new class -`org.apache.avro.Resolver` with the following key function: - - public static Resolver.Action resolve(Schema writer, Schema reader); - -The subclasses of `Resolver.Action` encapsulate various ways to -resolve schemas. The `resolve` function walks the reader's and -writer's schema parse trees together, and generate a tree of -`Resolver.Action` nodes indicating how to resolve each subtree of the -writer's schema into the corresponding subtree of the reader's. - -`Resolve.Action` has the following subclasses: - - * `DoNothing` -- nothing needs to be done to resolve the writer's - data into the reader's schema. That is, the reader should read - the data written by the writer as if it were written using the - reader's own schema. This can be generated for any kind of - schema -- for example, if the reader's and writer's schemas are - the exact same union schema, a `DoNothing` will be generated -- - so consumers of `Resolver` need to be able to handle `DoNothing` - for all schemas. - - * `Promote` -- the writer's value needs to be promoted to the - reader's schema. Generated only for numeric and byte/string - types. - - * `ContainerAction` -- no resolution is needed directly on - container schemas, but a `ContainerAction` contains the `Action` - needed for the contained schema - - * `EnumAdjust` -- resolution involves dealing with reordering of - symbols and symbols that have been removed from the enumeration. - An `EnumAdjust` object contains the information needed to do so. - - * `RecordAdjust` -- resolution involves recursively resolving the - schemas for each field, and dealing with reordering and removal - of fields. A `RecordAdjust` object contains the information - needed to do so. - - * `SkipAction` -- only generated as a sub-action of a - `RecordAdjust` action. Used to indicate that a writer's field - does not appear in the reader's schema and thus should be - skipped. - - * `WriterUnion` -- generated when the writer's schema is a union - and the reader's schema is not the identical union. Has - subactions for resolving each branch of the writer's union - against the reader's schema. - - * `ReaderUnion` -- generated when the reader's schema is a union - and the writer's was not. Had information indicating which of - the reader's union-branch was the best fit for the writer's - schema, and a subaction for resolving the schema of that branch - against the writer's schema. - - * `ErrorAction` -- generated when the (sub)schemas can't be - resolved. - -These new classes aresimilar to the family of `Symbol` objects we've -defined for `ResolvingGrammarGenerator`. For example, -`Action.RecordAdjust` is similar to `Symbol.FieldOrderAction`, and -`Action.EnumAdjust` in `Symbol.EnumAdjustAction`. This similarity is -not surprising, since those `Symbol` objects were design to -encapsulate the logic of schema resolution as well. - -However, where `ResolvingGrammarGenerator` embeds those `Symbol` -objects into flattened productions highly optimized for the LL(1) -parser implemented by `ResolvingDecoder`. The `Resolver`, in -contrast, captures the schema-resolution logic in a tree-like -structure that closely mirrors the syntax-tree of the schemas being -resolved. This tree-like representation is easily consumed by -multiple implementations of resolution -- be it the grammar-based -implementation of `ResolvingDecoder`, the "action-sequence"-based -implementation of AVRO-2247, or the dynamic code-gen implementation -being considered as an extension to AVRO-2090. - -We have reimplemented `ResolvingGrammarGenerator` to eliminate it's -implementaiton of schema-resolution logic and instead consume the -output of `Resolver.resolve`. Thus, it might be helpful to study -`ResolvingGrammarGenerator` to better understand how to consume this -output in other circumstances. diff --git a/doc/src/content/xdocs/gettingstartedjava.xml b/doc/src/content/xdocs/gettingstartedjava.xml deleted file mode 100644 index 5440b07efe4..00000000000 --- a/doc/src/content/xdocs/gettingstartedjava.xml +++ /dev/null @@ -1,527 +0,0 @@ - - - - %avro-entities; -]> - -

- Apache Avro™ &AvroVersion; Getting Started (Java) -
- -

- This is a short guide for getting started with Apache Avro™ using - Java. This guide only covers using Avro for data serialization; see - Patrick Hunt's Avro - RPC Quick Start for a good introduction to using Avro for RPC. -

-
- Download -

- Avro implementations for C, C++, C#, Java, PHP, Python, and Ruby can be - downloaded from the Apache Avro™ - Releases page. This guide uses Avro &AvroVersion;, the latest - version at the time of writing. For the examples in this guide, - download avro-&AvroVersion;.jar and - avro-tools-&AvroVersion;.jar. -

-

- Alternatively, if you are using Maven, add the following dependency to - your POM: -

- -<dependency> - <groupId>org.apache.avro</groupId> - <artifactId>avro</artifactId> - <version>&AvroVersion;</version> -</dependency> - -

- As well as the Avro Maven plugin (for performing code generation): -

- -<plugin> - <groupId>org.apache.avro</groupId> - <artifactId>avro-maven-plugin</artifactId> - <version>&AvroVersion;</version> - <executions> - <execution> - <phase>generate-sources</phase> - <goals> - <goal>schema</goal> - </goals> - <configuration> - <sourceDirectory>${project.basedir}/src/main/avro/</sourceDirectory> - <outputDirectory>${project.basedir}/src/main/java/</outputDirectory> - </configuration> - </execution> - </executions> -</plugin> -<plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-compiler-plugin</artifactId> - <configuration> - <source>1.8</source> - <target>1.8</target> - </configuration> -</plugin> - -

- You may also build the required Avro jars from source. Building Avro is - beyond the scope of this guide; see the Build - Documentation page in the wiki for more information. -

-
- -
- Defining a schema -

- Avro schemas are defined using JSON. Schemas are composed of primitive types - (null, boolean, int, - long, float, double, - bytes, and string) and complex types (record, - enum, array, map, - union, and fixed). You can learn more about - Avro schemas and types from the specification, but for now let's start - with a simple schema example, user.avsc: -

- -{"namespace": "example.avro", - "type": "record", - "name": "User", - "fields": [ - {"name": "name", "type": "string"}, - {"name": "favorite_number", "type": ["int", "null"]}, - {"name": "favorite_color", "type": ["string", "null"]} - ] -} - -

- This schema defines a record representing a hypothetical user. (Note - that a schema file can only contain a single schema definition.) At - minimum, a record definition must include its type ("type": - "record"), a name ("name": "User"), and fields, in - this case name, favorite_number, and - favorite_color. We also define a namespace - ("namespace": "example.avro"), which together with the name - attribute defines the "full name" of the schema - (example.avro.User in this case). - -

-

- Fields are defined via an array of objects, each of which defines a name - and type (other attributes are optional, see the record specification for more - details). The type attribute of a field is another schema object, which - can be either a primitive or complex type. For example, the - name field of our User schema is the primitive type - string, whereas the favorite_number and - favorite_color fields are both unions, - represented by JSON arrays. unions are a complex type that - can be any of the types listed in the array; e.g., - favorite_number can either be an int or - null, essentially making it an optional field. -

-
- -
- Serializing and deserializing with code generation -
- Compiling the schema -

- Code generation allows us to automatically create classes based on our - previously-defined schema. Once we have defined the relevant classes, - there is no need to use the schema directly in our programs. We use the - avro-tools jar to generate code as follows: -

- -java -jar /path/to/avro-tools-&AvroVersion;.jar compile schema <schema file> <destination> - -

- This will generate the appropriate source files in a package based on - the schema's namespace in the provided destination folder. For - instance, to generate a User class in package - example.avro from the schema defined above, run -

- -java -jar /path/to/avro-tools-&AvroVersion;.jar compile schema user.avsc . - -

- Note that if you using the Avro Maven plugin, there is no need to - manually invoke the schema compiler; the plugin automatically - performs code generation on any .avsc files present in the configured - source directory. -

-
-
- Creating Users -

- Now that we've completed the code generation, let's create some - Users, serialize them to a data file on disk, and then - read back the file and deserialize the User objects. -

-

- First let's create some Users and set their fields. -

- -User user1 = new User(); -user1.setName("Alyssa"); -user1.setFavoriteNumber(256); -// Leave favorite color null - -// Alternate constructor -User user2 = new User("Ben", 7, "red"); - -// Construct via builder -User user3 = User.newBuilder() - .setName("Charlie") - .setFavoriteColor("blue") - .setFavoriteNumber(null) - .build(); - -

- As shown in this example, Avro objects can be created either by - invoking a constructor directly or by using a builder. Unlike - constructors, builders will automatically set any default values - specified in the schema. Additionally, builders validate the data as - it set, whereas objects constructed directly will not cause an error - until the object is serialized. However, using constructors directly - generally offers better performance, as builders create a copy of the - datastructure before it is written. -

-

- Note that we do not set user1's favorite color. Since - that record is of type ["string", "null"], we can either - set it to a string or leave it null; it is - essentially optional. Similarly, we set user3's favorite - number to null (using a builder requires setting all fields, even if - they are null). -

-
-
- Serializing -

- Now let's serialize our Users to disk. -

- -// Serialize user1, user2 and user3 to disk -DatumWriter<User> userDatumWriter = new SpecificDatumWriter<User>(User.class); -DataFileWriter<User> dataFileWriter = new DataFileWriter<User>(userDatumWriter); -dataFileWriter.create(user1.getSchema(), new File("users.avro")); -dataFileWriter.append(user1); -dataFileWriter.append(user2); -dataFileWriter.append(user3); -dataFileWriter.close(); - -

- We create a DatumWriter, which converts Java objects into - an in-memory serialized format. The SpecificDatumWriter - class is used with generated classes and extracts the schema from the - specified generated type. -

-

- Next we create a DataFileWriter, which writes the - serialized records, as well as the schema, to the file specified in the - dataFileWriter.create call. We write our users to the file - via calls to the dataFileWriter.append method. When we are - done writing, we close the data file. -

-
-
- Deserializing -

- Finally, let's deserialize the data file we just created. -

- -// Deserialize Users from disk -DatumReader<User> userDatumReader = new SpecificDatumReader<User>(User.class); -DataFileReader<User> dataFileReader = new DataFileReader<User>(file, userDatumReader); -User user = null; -while (dataFileReader.hasNext()) { -// Reuse user object by passing it to next(). This saves us from -// allocating and garbage collecting many objects for files with -// many items. -user = dataFileReader.next(user); -System.out.println(user); -} - -

- This snippet will output: -

- -{"name": "Alyssa", "favorite_number": 256, "favorite_color": null} -{"name": "Ben", "favorite_number": 7, "favorite_color": "red"} -{"name": "Charlie", "favorite_number": null, "favorite_color": "blue"} - -

- Deserializing is very similar to serializing. We create a - SpecificDatumReader, analogous to the - SpecificDatumWriter we used in serialization, which - converts in-memory serialized items into instances of our generated - class, in this case User. We pass the - DatumReader and the previously created File - to a DataFileReader, analogous to the - DataFileWriter, which reads both the schema used by the - writer as well as the data from the file on disk. The data will be - read using the writer's schema included in the file and the - schema provided by the reader, in this case the User - class. The writer's schema is needed to know the order in which - fields were written, while the reader's schema is needed to know what - fields are expected and how to fill in default values for fields - added since the file was written. If there are differences between - the two schemas, they are resolved according to the - Schema Resolution - specification. -

-

- Next we use the DataFileReader to iterate through the - serialized Users and print the deserialized object to - stdout. Note how we perform the iteration: we create a single - User object which we store the current deserialized user - in, and pass this record object to every call of - dataFileReader.next. This is a performance optimization - that allows the DataFileReader to reuse the same - User object rather than allocating a new - User for every iteration, which can be very expensive in - terms of object allocation and garbage collection if we deserialize a - large data file. While this technique is the standard way to iterate - through a data file, it's also possible to use for (User user : - dataFileReader) if performance is not a concern. -

-
-
- Compiling and running the example code -

- This example code is included as a Maven project in the - examples/java-example directory in the Avro docs. From this - directory, execute the following commands to build and run the - example: -

- -$ mvn compile # includes code generation via Avro Maven plugin -$ mvn -q exec:java -Dexec.mainClass=example.SpecificMain - -
-
- Beta feature: Generating faster code -

- In this release we have introduced a new approach to - generating code that speeds up decoding of objects by more - than 10% and encoding by more than 30% (future performance - enhancements are underway). To ensure a smooth introduction - of this change into production systems, this feature is - controlled by a feature flag, the system - property org.apache.avro.specific.use_custom_coders. - In this first release, this feature is off by default. To - turn it on, set the system flag to true at - runtime. In the sample above, for example, you could enable - the fater coders as follows: -

- -$ mvn -q exec:java -Dexec.mainClass=example.SpecificMain \ - -Dorg.apache.avro.specific.use_custom_coders=true - -

- Note that you do not have to recompile your Avro - schema to have access to this feature. The feature is - compiled and built into your code, and you turn it on and - off at runtime using the feature flag. As a result, you can - turn it on during testing, for example, and then off in - production. Or you can turn it on in production, and - quickly turn it off if something breaks. -

-

- We encourage the Avro community to exercise this new feature - early to help build confidence. (For those paying - one-demand for compute resources in the cloud, it can lead - to meaningful cost savings.) As confidence builds, we will - turn this feature on by default, and eventually eliminate - the feature flag (and the old code). -

-
-
- -
- Serializing and deserializing without code generation -

- Data in Avro is always stored with its corresponding schema, meaning we - can always read a serialized item regardless of whether we know the - schema ahead of time. This allows us to perform serialization and - deserialization without code generation. -

-

- Let's go over the same example as in the previous section, but without - using code generation: we'll create some users, serialize them to a data - file on disk, and then read back the file and deserialize the users - objects. -

-
- Creating users -

- First, we use a Parser to read our schema definition and - create a Schema object. -

- -Schema schema = new Schema.Parser().parse(new File("user.avsc")); - -

- Using this schema, let's create some users. -

- -GenericRecord user1 = new GenericData.Record(schema); -user1.put("name", "Alyssa"); -user1.put("favorite_number", 256); -// Leave favorite color null - -GenericRecord user2 = new GenericData.Record(schema); -user2.put("name", "Ben"); -user2.put("favorite_number", 7); -user2.put("favorite_color", "red"); - -

- Since we're not using code generation, we use - GenericRecords to represent users. - GenericRecord uses the schema to verify that we only - specify valid fields. If we try to set a non-existent field (e.g., - user1.put("favorite_animal", "cat")), we'll get an - AvroRuntimeException when we run the program. -

-

- Note that we do not set user1's favorite color. Since - that record is of type ["string", "null"], we can either - set it to a string or leave it null; it is - essentially optional. -

-
-
- Serializing -

- Now that we've created our user objects, serializing and deserializing - them is almost identical to the example above which uses code - generation. The main difference is that we use generic instead of - specific readers and writers. -

-

- First we'll serialize our users to a data file on disk. -

- -// Serialize user1 and user2 to disk -File file = new File("users.avro"); -DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema); -DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter); -dataFileWriter.create(schema, file); -dataFileWriter.append(user1); -dataFileWriter.append(user2); -dataFileWriter.close(); - -

- We create a DatumWriter, which converts Java objects into - an in-memory serialized format. Since we are not using code - generation, we create a GenericDatumWriter. It requires - the schema both to determine how to write the - GenericRecords and to verify that all non-nullable fields - are present. -

-

- As in the code generation example, we also create a - DataFileWriter, which writes the serialized records, as - well as the schema, to the file specified in the - dataFileWriter.create call. We write our users to the - file via calls to the dataFileWriter.append method. When - we are done writing, we close the data file. -

-
-
- Deserializing -

- Finally, we'll deserialize the data file we just created. -

- -// Deserialize users from disk -DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema); -DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(file, datumReader); -GenericRecord user = null; -while (dataFileReader.hasNext()) { -// Reuse user object by passing it to next(). This saves us from -// allocating and garbage collecting many objects for files with -// many items. -user = dataFileReader.next(user); -System.out.println(user); - -

This outputs:

- -{"name": "Alyssa", "favorite_number": 256, "favorite_color": null} -{"name": "Ben", "favorite_number": 7, "favorite_color": "red"} - -

- Deserializing is very similar to serializing. We create a - GenericDatumReader, analogous to the - GenericDatumWriter we used in serialization, which - converts in-memory serialized items into GenericRecords. - We pass the DatumReader and the previously created - File to a DataFileReader, analogous to the - DataFileWriter, which reads both the schema used by the - writer as well as the data from the file on disk. The data will be - read using the writer's schema included in the file, and the reader's - schema provided to the GenericDatumReader. The writer's - schema is needed to know the order in which fields were written, - while the reader's schema is needed to know what fields are expected - and how to fill in default values for fields added since the file - was written. If there are differences between the two schemas, they - are resolved according to the - Schema Resolution - specification. -

-

- Next, we use the DataFileReader to iterate through the - serialized users and print the deserialized object to stdout. Note - how we perform the iteration: we create a single - GenericRecord object which we store the current - deserialized user in, and pass this record object to every call of - dataFileReader.next. This is a performance optimization - that allows the DataFileReader to reuse the same record - object rather than allocating a new GenericRecord for - every iteration, which can be very expensive in terms of object - allocation and garbage collection if we deserialize a large data file. - While this technique is the standard way to iterate through a data - file, it's also possible to use for (GenericRecord user : - dataFileReader) if performance is not a concern. -

-
-
- Compiling and running the example code -

- This example code is included as a Maven project in the - examples/java-example directory in the Avro docs. From this - directory, execute the following commands to build and run the - example: -

- -$ mvn compile -$ mvn -q exec:java -Dexec.mainClass=example.GenericMain - -
-
- - diff --git a/doc/src/content/xdocs/gettingstartedpython.xml b/doc/src/content/xdocs/gettingstartedpython.xml deleted file mode 100644 index f6216b116d8..00000000000 --- a/doc/src/content/xdocs/gettingstartedpython.xml +++ /dev/null @@ -1,258 +0,0 @@ - - - - %avro-entities; -]> - -
- Apache Avro™ &AvroVersion; Getting Started (Python) -
- -

- This is a short guide for getting started with Apache Avro™ using - Python. This guide only covers using Avro for data serialization; see - Patrick Hunt's Avro - RPC Quick Start for a good introduction to using Avro for RPC. -

- -
- Notice for Python 3 users -

- A package called "avro-python3" had been provided to support - Python 3 previously, but the codebase was consolidated into - the "avro" package that supports Python 3 now. - - The avro-python3 package will be removed in the near future, - so users should use the "avro" package instead. - They are mostly API compatible, but there's a few minor difference - (e.g., function name capitalization, - such as avro.schema.Parse vs avro.schema.parse). -

-
- -
- Download and Install -

- The easiest way to get started in Python is to install avro from PyPI - using pip, the Python Package Installer. -

- -$ python3 -m pip install avro - -

Consider doing a local install or using a virtualenv to avoid permissions problems and interfering with system packages:

- -$ python3 -m pip install --user install avro - -

or

- - $ python3 -m venv avro-venv - $ avro-venv/bin/pip install avro - -

- The official releases of the Avro implementations for C, C++, C#, Java, - PHP, Python, and Ruby can be downloaded from the Apache Avro™ - Releases page. This guide uses Avro &AvroVersion;, the latest - version at the time of writing. Download and install - avro-&AvroVersion;-py2.py3-none-any.whl or - avro-&AvroVersion;.tar.gz via - python -m pip avro-&AvroVersion;-py2.py3-none-any.whl - or - python -m pip avro-&AvroVersion;.tar.gz. - (As above, consider using a virtualenv or user-local install.) -

-

Check that you can import avro from a Python prompt.

- -$ python3 -c 'import avro; print(avro.__version__)' - -

The above should print &AvroVersion;. It should not raise an ImportError.

-

- Alternatively, you may build the Avro Python library from source. From - your the root Avro directory, run the commands -

- -$ cd lang/py/ -$ python3 -m pip install -e . -$ python3 - -
- -
- Defining a schema -

- Avro schemas are defined using JSON. Schemas are composed of primitive types - (null, boolean, int, - long, float, double, - bytes, and string) and complex types (record, - enum, array, map, - union, and fixed). You can learn more about - Avro schemas and types from the specification, but for now let's start - with a simple schema example, user.avsc: -

- -{"namespace": "example.avro", - "type": "record", - "name": "User", - "fields": [ - {"name": "name", "type": "string"}, - {"name": "favorite_number", "type": ["int", "null"]}, - {"name": "favorite_color", "type": ["string", "null"]} - ] -} - -

- This schema defines a record representing a hypothetical user. (Note - that a schema file can only contain a single schema definition.) At - minimum, a record definition must include its type ("type": - "record"), a name ("name": "User"), and fields, in - this case name, favorite_number, and - favorite_color. We also define a namespace - ("namespace": "example.avro"), which together with the name - attribute defines the "full name" of the schema - (example.avro.User in this case). - -

-

- Fields are defined via an array of objects, each of which defines a name - and type (other attributes are optional, see the record specification for more - details). The type attribute of a field is another schema object, which - can be either a primitive or complex type. For example, the - name field of our User schema is the primitive type - string, whereas the favorite_number and - favorite_color fields are both unions, - represented by JSON arrays. unions are a complex type that - can be any of the types listed in the array; e.g., - favorite_number can either be an int or - null, essentially making it an optional field. -

-
- -
- Serializing and deserializing without code generation -

- Data in Avro is always stored with its corresponding schema, meaning we - can always read a serialized item, regardless of whether we know the - schema ahead of time. This allows us to perform serialization and - deserialization without code generation. Note that the Avro Python - library does not support code generation. -

-

- Try running the following code snippet, which serializes two users to a - data file on disk, and then reads back and deserializes the data file: -

- -import avro.schema -from avro.datafile import DataFileReader, DataFileWriter -from avro.io import DatumReader, DatumWriter - -schema = avro.schema.parse(open("user.avsc", "rb").read()) - -writer = DataFileWriter(open("users.avro", "wb"), DatumWriter(), schema) -writer.append({"name": "Alyssa", "favorite_number": 256}) -writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"}) -writer.close() - -reader = DataFileReader(open("users.avro", "rb"), DatumReader()) -for user in reader: - print user -reader.close() - -

This outputs:

- -{u'favorite_color': None, u'favorite_number': 256, u'name': u'Alyssa'} -{u'favorite_color': u'red', u'favorite_number': 7, u'name': u'Ben'} - -

- Do make sure that you open your files in binary mode (i.e. using the modes - wb or rb respectively). Otherwise you might - generate corrupt files due to - - automatic replacement of newline characters with the - platform-specific representations. -

-

- Let's take a closer look at what's going on here. -

- -schema = avro.schema.parse(open("user.avsc", "rb").read()) - -

- avro.schema.parse takes a string containing a JSON schema - definition as input and outputs a avro.schema.Schema object - (specifically a subclass of Schema, in this case - RecordSchema). We're passing in the contents of our - user.avsc schema file here. -

- -writer = DataFileWriter(open("users.avro", "wb"), DatumWriter(), schema) - -

- We create a DataFileWriter, which we'll use to write - serialized items to a data file on disk. The - DataFileWriter constructor takes three arguments: -

-
    -
  • The file we'll serialize to
  • -
  • A DatumWriter, which is responsible for actually - serializing the items to Avro's binary format - (DatumWriters can be used separately from - DataFileWriters, e.g., to perform IPC with Avro).
  • -
  • The schema we're using. The DataFileWriter needs the - schema both to write the schema to the data file, and to verify that - the items we write are valid items and write the appropriate - fields.
  • -
- -writer.append({"name": "Alyssa", "favorite_number": 256}) -writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"}) - -

- We use DataFileWriter.append to add items to our data - file. Avro records are represented as Python dicts. - Since the field favorite_color has type ["int", - "null"], we are not required to specify this field, as shown in - the first append. Were we to omit the required name - field, an exception would be raised. Any extra entries not - corresponding to a field are present in the dict are - ignored. -

- -reader = DataFileReader(open("users.avro", "rb"), DatumReader()) - -

- We open the file again, this time for reading back from disk. We use - a DataFileReader and DatumReader analagous - to the DataFileWriter and DatumWriter above. -

- -for user in reader: - print user - -

- The DataFileReader is an iterator that returns - dicts corresponding to the serialized items. -

-
- -
diff --git a/doc/src/content/xdocs/idl.xml b/doc/src/content/xdocs/idl.xml deleted file mode 100644 index 52a607503b3..00000000000 --- a/doc/src/content/xdocs/idl.xml +++ /dev/null @@ -1,486 +0,0 @@ - - - - %avro-entities; -]> - -
- Apache Avro™ &AvroVersion; IDL -
- - -
- Introduction - -

This document defines Avro IDL, a higher-level language for authoring Avro schemata. - Before reading this document, you should have familiarity with the concepts of schemata and protocols, - as well as the various primitive and complex types available in Avro. -

-
- -
- Overview -
- Purpose -

The aim of the Avro IDL language is to enable developers to author schemata in a way that - feels more similar to common programming languages like Java, C++, or Python. Additionally, - the Avro IDL language may feel more familiar for those users who have previously used the - interface description languages (IDLs) in other frameworks like Thrift, Protocol Buffers, or CORBA. -

-
-
- Usage -

- Each Avro IDL file defines a single Avro Protocol, and thus generates as its output a JSON-format - Avro Protocol file with extension .avpr. -

-

- To convert a .avdl file into a .avpr file, it may be processed by the - idl tool. For example: -

- -$ java -jar avro-tools.jar idl src/test/idl/input/namespaces.avdl /tmp/namespaces.avpr -$ head /tmp/namespaces.avpr -{ - "protocol" : "TestNamespace", - "namespace" : "avro.test.protocol", - -

- The idl tool can also process input to and from stdin and stdout. - See idl --help for full usage information. -

-

A Maven plugin is also provided to compile .avdl files. To - use it, add something like the following to your pom.xml:

- - - - org.apache.avro - avro-maven-plugin - - - - idl-protocol - - - - - - -]]> -
-
- -
- Defining a Protocol in Avro IDL - -

An Avro IDL file consists of exactly one protocol definition. The minimal protocol is defined - by the following code: -

- -protocol MyProtocol { -} - -

- This is equivalent to (and generates) the following JSON protocol definition: -

- - -{ -"protocol" : "MyProtocol", - "types" : [ ], - "messages" : { - } -} - -

- The namespace of the protocol may be changed using the @namespace annotation: -

- -@namespace("mynamespace") -protocol MyProtocol { -} - -

- This notation is used throughout Avro IDL as a way of specifying properties for the annotated element, - as will be described later in this document. -

-

- Protocols in Avro IDL can contain the following items: -

-
    -
  • Imports of external protocol and schema files.
  • -
  • Definitions of named schemata, including records, errors, enums, and fixeds.
  • -
  • Definitions of RPC messages
  • -
-
-
- Imports -

Files may be imported in one of three formats:

-
    -
  • An IDL file may be imported with a statement like: - import idl "foo.avdl"; -
  • -
  • A JSON protocol file may be imported with a statement like: - import protocol "foo.avpr"; -
  • -
  • A JSON schema file may be imported with a statement like: - import schema "foo.avsc"; -
  • -
-

Messages and types in the imported file are added to this - file's protocol.

-

Imported file names are resolved relative to the current IDL file.

-
-
- Defining an Enumeration -

- Enums are defined in Avro IDL using a syntax similar to C or Java. An Avro Enum supports optional default values. - In the case that a reader schema is unable to recognize a symbol written by the writer, the reader will fall back to using the defined default value. - This default is only used when an incompatible symbol is read. It is not used if the enum field is missing. -

-

- Example Writer Enum Definition -

- -enum Shapes { - SQUARE, TRIANGLE, CIRCLE, OVAL -} - -

- Example Reader Enum Definition -

- -enum Shapes { - SQUARE, TRIANGLE, CIRCLE -} = CIRCLE; - -

- In the above example, the reader will use the default value of CIRCLE whenever reading data written with the OVAL symbol of the writer. - Also note that, unlike the JSON format, anonymous enums cannot be defined. -

-
-
- Defining a Fixed Length Field -

- Fixed fields are defined using the following syntax: -

- -fixed MD5(16); - -

This example defines a fixed-length type called MD5 which contains 16 bytes.

-
- -
- Defining Records and Errors -

- Records are defined in Avro IDL using a syntax similar to a struct definition in C: -

- -record Employee { - string name; - boolean active = true; - long salary; -} - -

- The above example defines a record with the name “Employee” with three fields. -

-

- To define an error, simply use the keyword error instead of record. - For example: -

- -error Kaboom { - string explanation; - int result_code = -1; -} - -

- Each field in a record or error consists of a type and a name, - optional property annotations and an optional default value. -

-

A type reference in Avro IDL must be one of:

-
    -
  • A primitive type
  • -
  • A logical type
  • -
  • A named schema defined prior to this usage in the same Protocol
  • -
  • A complex type (array, map, or union)
  • -
- -
- Primitive Types -

The primitive types supported by Avro IDL are the same as those supported by Avro's JSON format. - This list includes int, long, string, boolean, - float, double, null, and bytes. -

-
- -
- Logical Types -

Some of the logical types supported by Avro's JSON format are also supported by Avro IDL. - The currently supported types are: -

- -

For example:

- -record Job { - string jobid; - date submitDate; - time_ms submitTime; - timestamp_ms finishTime; - decimal(9,2) finishRatio; -} - -
- -
- References to Named Schemata -

If a named schema has already been defined in the same Avro IDL file, it may be referenced by name - as if it were a primitive type: -

- -record Card { - Suit suit; // refers to the enum Card defined above - int number; -} - -
-
- Default Values - -

Default values for fields may be optionally - specified by using an equals sign after the field name - followed by a JSON expression indicating the default value. - This JSON is interpreted as described in - the spec.

- -
-
- Complex Types - -
- Arrays -

- Array types are written in a manner that will seem familiar to C++ or Java programmers. An array of - any type t is denoted array<t>. For example, an array of strings is - denoted array<string>, and a multidimensional array of Foo records - would be array<array<Foo>>. -

-
- -
- Maps -

Map types are written similarly to array types. An array that contains values of type - t is written map<t>. As in the JSON schema format, all - maps contain string-type keys.

-
- -
- Unions -

Union types are denoted as union { typeA, typeB, typeC, ... }. For example, - this record contains a string field that is optional (unioned with null): -

- -record RecordWithUnion { - union { null, string } optionalString; -} - -

- Note that the same restrictions apply to Avro IDL unions as apply to unions defined in the - JSON format; namely, a record may not contain multiple elements of the same type. -

-
-
-
-
- Defining RPC Messages -

The syntax to define an RPC message within a Avro IDL protocol is similar to the syntax for - a method declaration within a C header file or a Java interface. To define an RPC message - add which takes two arguments named foo and bar, - returning an int, simply include the following definition within the protocol: -

- -int add(int foo, int bar = 0); - -

Message arguments, like record fields, may specify default - values.

-

To define a message with no response, you may use the alias void, equivalent - to the Avro null type: -

- -void logMessage(string message); - -

- If you have previously defined an error type within the same protocol, you may declare that - a message can throw this error using the syntax: -

- -void goKaboom() throws Kaboom; - -

To define a one-way message, use the - keyword oneway after the parameter list, for example: -

- -void fireAndForget(string message) oneway; - -
-
- Other Language Features -
- Comments -

All Java-style comments are supported within a Avro IDL file. Any text following - // on a line is ignored, as is any text between /* and - */, possibly spanning multiple lines.

-

Comments that begin with /** are used as the - documentation string for the type or field definition that - follows the comment.

-
-
- Escaping Identifiers -

Occasionally, one will need to use a reserved language keyword as an identifier. In order - to do so, backticks (`) may be used to escape the identifier. For example, to define - a message with the literal name error, you may write: -

- -void `error`(); - -

This syntax is allowed anywhere an identifier is expected.

-
-
- Annotations for Ordering and Namespaces -

Java-style annotations may be used to add additional - properties to types and fields throughout Avro IDL.

- -

For example, to specify the sort order of a field within - a record, one may use the @order annotation - before the field name as follows:

- -record MyRecord { - string @order("ascending") myAscendingSortField; - string @order("descending") myDescendingField; - string @order("ignore") myIgnoredField; -} - -

A field's type may also be preceded by annotations, e.g.:

- -record MyRecord { - @java-class("java.util.ArrayList") array<string> myStrings; -} - - -

This can be used to support java classes that can be - serialized/deserialized via their toString/String constructor, e.g.:

- -record MyRecord { - @java-class("java.math.BigDecimal") string value; - @java-key-class("java.io.File") map<string> fileStates; - array<@java-class("java.math.BigDecimal") string> weights; -} - - -

Similarly, a @namespace annotation may be used to modify the namespace - when defining a named schema. For example: -

- -@namespace("org.apache.avro.firstNamespace") -protocol MyProto { - @namespace("org.apache.avro.someOtherNamespace") - record Foo {} - - record Bar {} -} - -

- will define a protocol in the firstNamespace namespace. The record Foo will be - defined in someOtherNamespace and Bar will be defined in firstNamespace - as it inherits its default from its container. -

-

Type and field aliases are specified with - the @aliases annotation as follows:

- -@aliases(["org.old.OldRecord", "org.ancient.AncientRecord"]) -record MyRecord { - string @aliases(["oldField", "ancientField"]) myNewField; -} - -

Some annotations like those listed above are handled - specially. All other annotations are added as properties to - the protocol, message, schema or field.

-
-
-
- Complete Example -

The following is a complete example of a Avro IDL file that shows most of the above features:

- -/** - * An example protocol in Avro IDL - */ -@namespace("org.apache.avro.test") -protocol Simple { - - @aliases(["org.foo.KindOf"]) - enum Kind { - FOO, - BAR, // the bar enum value - BAZ - } - - fixed MD5(16); - - record TestRecord { - @order("ignore") - string name; - - @order("descending") - Kind kind; - - MD5 hash; - - union { MD5, null} @aliases(["hash"]) nullableHash; - - array<long> arrayOfLongs; - } - - error TestError { - string message; - } - - string hello(string greeting); - TestRecord echo(TestRecord `record`); - int add(int arg1, int arg2); - bytes echoBytes(bytes data); - void `error`() throws TestError; - void ping() oneway; -} - -

Additional examples may be found in the Avro source tree under the src/test/idl/input directory.

-
- -

Apache Avro, Avro, Apache, and the Avro and Apache logos are - trademarks of The Apache Software Foundation.

- - -
diff --git a/doc/src/content/xdocs/index.xml b/doc/src/content/xdocs/index.xml deleted file mode 100644 index 4247e212ec3..00000000000 --- a/doc/src/content/xdocs/index.xml +++ /dev/null @@ -1,96 +0,0 @@ - - - - %avro-entities; -]> - -
- Apache Avro™ &AvroVersion; Documentation -
- -
- Introduction -

Apache Avro™ is a data serialization system.

-

Avro provides:

-
    -
  • Rich data structures.
  • -
  • A compact, fast, binary data format.
  • -
  • A container file, to store persistent data.
  • -
  • Remote procedure call (RPC).
  • -
  • Simple integration with dynamic languages. Code - generation is not required to read or write data files nor - to use or implement RPC protocols. Code generation as an - optional optimization, only worth implementing for - statically typed languages.
  • -
-
-
- Schemas -

Avro relies on schemas. When Avro data is read, the - schema used when writing it is always present. This permits - each datum to be written with no per-value overheads, making - serialization both fast and small. This also facilitates use - with dynamic, scripting languages, since data, together with - its schema, is fully self-describing.

-

When Avro data is stored in a file, its schema is stored with - it, so that files may be processed later by any program. If - the program reading the data expects a different schema this - can be easily resolved, since both schemas are present.

-

When Avro is used in RPC, the client and server exchange - schemas in the connection handshake. (This can be optimized - so that, for most calls, no schemas are actually transmitted.) - Since both client and server both have the other's full - schema, correspondence between same named fields, missing - fields, extra fields, etc. can all be easily resolved.

-

Avro schemas are defined with - JSON . This - facilitates implementation in languages that already have - JSON libraries.

-
-
- Comparison with other systems -

Avro provides functionality similar to systems such - as Thrift, - Protocol - Buffers, etc. Avro differs from these systems in the - following fundamental aspects.

-
    -
  • Dynamic typing: Avro does not require that code - be generated. Data is always accompanied by a schema that - permits full processing of that data without code - generation, static datatypes, etc. This facilitates - construction of generic data-processing systems and - languages.
  • -
  • Untagged data: Since the schema is present when - data is read, considerably less type information need be - encoded with data, resulting in smaller serialization size.
  • -
  • No manually-assigned field IDs: When a schema - changes, both the old and new schema are always present when - processing data, so differences may be resolved - symbolically, using field names.
  • -
-
- -

Apache Avro, Avro, Apache, and the Avro and Apache logos are - trademarks of The Apache Software Foundation.

- - -
diff --git a/doc/src/content/xdocs/mr.xml b/doc/src/content/xdocs/mr.xml deleted file mode 100644 index f5a70b95a58..00000000000 --- a/doc/src/content/xdocs/mr.xml +++ /dev/null @@ -1,580 +0,0 @@ - - - - %avro-entities; -]> - -
- Apache Avro™ &AvroVersion; Hadoop MapReduce guide -
- -

- Avro provides a convenient way to represent complex data structures within - a Hadoop MapReduce job. Avro data can be used as both input to and output - from a MapReduce job, as well as the intermediate format. The example in - this guide uses Avro data for all three, but it's possible to mix and - match; for instance, MapReduce can be used to aggregate a particular field - in an Avro record. -

-

- This guide assumes basic familiarity with both Hadoop MapReduce and Avro. - See the Hadoop - documentation and the Avro getting - started guide for introductions to these projects. This guide uses - the old MapReduce API (org.apache.hadoop.mapred) and the new - MapReduce API (org.apache.hadoop.mapreduce). -

-
- Setup -

- The code from this guide is included in the Avro docs under - examples/mr-example. The example is set up as a Maven project - that includes the necessary Avro and MapReduce dependencies and the Avro - Maven plugin for code generation, so no external jars are needed to run - the example. In particular, the POM includes the following dependencies: -

- -<dependency> - <groupId>org.apache.avro</groupId> - <artifactId>avro</artifactId> - <version>&AvroVersion;</version> -</dependency> -<dependency> - <groupId>org.apache.avro</groupId> - <artifactId>avro-mapred</artifactId> - <version>&AvroVersion;</version> -</dependency> -<dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - <version>3.1.2</version> -</dependency> - -

- And the following plugin: -

- -<plugin> - <groupId>org.apache.avro</groupId> - <artifactId>avro-maven-plugin</artifactId> - <version>&AvroVersion;</version> - <executions> - <execution> - <phase>generate-sources</phase> - <goals> - <goal>schema</goal> - </goals> - <configuration> - <sourceDirectory>${project.basedir}/../</sourceDirectory> - <outputDirectory>${project.basedir}/target/generated-sources/</outputDirectory> - </configuration> - </execution> - </executions> -</plugin> - -

- If you do not configure the sourceDirectory and outputDirectory - properties, the defaults will be used. The sourceDirectory property - defaults to src/main/avro. The outputDirectory property - defaults to target/generated-sources. You can change the paths to - match your project layout. -

-

- Alternatively, Avro jars can be downloaded directly from the Apache Avro™ - Releases page. The relevant Avro jars for this guide are - avro-&AvroVersion;.jar and - avro-mapred-&AvroVersion;.jar, as well as - avro-tools-&AvroVersion;.jar for code generation and viewing - Avro data files as JSON. In addition, you will need to install Hadoop - in order to use MapReduce. -

-
- -
- Example: ColorCount -

- Below is a simple example of a MapReduce that uses Avro. There is an example - for both the old (org.apache.hadoop.mapred) and new - (org.apache.hadoop.mapreduce) APIs under - examples/mr-example/src/main/java/example/. MapredColorCount - is the example for the older mapred API while MapReduceColorCount is - the example for the newer mapreduce API. Both examples are below, but - we will detail the mapred API in our subsequent examples. -

- -

MapredColorCount:

- -package example; - -import java.io.IOException; - -import org.apache.avro.*; -import org.apache.avro.Schema.Type; -import org.apache.avro.mapred.*; -import org.apache.hadoop.conf.*; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapred.*; -import org.apache.hadoop.util.*; - -import example.avro.User; - -public class MapredColorCount extends Configured implements Tool { - - public static class ColorCountMapper extends AvroMapper<User, Pair<CharSequence, Integer>> { - @Override - public void map(User user, AvroCollector<Pair<CharSequence, Integer>> collector, Reporter reporter) - throws IOException { - CharSequence color = user.getFavoriteColor(); - // We need this check because the User.favorite_color field has type ["string", "null"] - if (color == null) { - color = "none"; - } - collector.collect(new Pair<CharSequence, Integer>(color, 1)); - } - } - - public static class ColorCountReducer extends AvroReducer<CharSequence, Integer, - Pair<CharSequence, Integer>> { - @Override - public void reduce(CharSequence key, Iterable<Integer> values, - AvroCollector<Pair<CharSequence, Integer>> collector, - Reporter reporter) - throws IOException { - int sum = 0; - for (Integer value : values) { - sum += value; - } - collector.collect(new Pair<CharSequence, Integer>(key, sum)); - } - } - - public int run(String[] args) throws Exception { - if (args.length != 2) { - System.err.println("Usage: MapredColorCount <input path> <output path>"); - return -1; - } - - JobConf conf = new JobConf(getConf(), MapredColorCount.class); - conf.setJobName("colorcount"); - - FileInputFormat.setInputPaths(conf, new Path(args[0])); - FileOutputFormat.setOutputPath(conf, new Path(args[1])); - - AvroJob.setMapperClass(conf, ColorCountMapper.class); - AvroJob.setReducerClass(conf, ColorCountReducer.class); - - // Note that AvroJob.setInputSchema and AvroJob.setOutputSchema set - // relevant config options such as input/output format, map output - // classes, and output key class. - AvroJob.setInputSchema(conf, User.getClassSchema()); - AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING), - Schema.create(Type.INT))); - - JobClient.runJob(conf); - return 0; - } - - public static void main(String[] args) throws Exception { - int res = ToolRunner.run(new Configuration(), new MapredColorCount(), args); - System.exit(res); - } -} - - -

MapReduceColorCount:

- -package example; - -import java.io.IOException; - -import org.apache.avro.Schema; -import org.apache.avro.mapred.AvroKey; -import org.apache.avro.mapred.AvroValue; -import org.apache.avro.mapreduce.AvroJob; -import org.apache.avro.mapreduce.AvroKeyInputFormat; -import org.apache.avro.mapreduce.AvroKeyValueOutputFormat; -import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.Reducer; -import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; -import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; -import org.apache.hadoop.util.Tool; -import org.apache.hadoop.util.ToolRunner; - -import example.avro.User; - -public class MapReduceColorCount extends Configured implements Tool { - - public static class ColorCountMapper extends - Mapper<AvroKey<User>, NullWritable, Text, IntWritable> { - - @Override - public void map(AvroKey<User> key, NullWritable value, Context context) - throws IOException, InterruptedException { - - CharSequence color = key.datum().getFavoriteColor(); - if (color == null) { - color = "none"; - } - context.write(new Text(color.toString()), new IntWritable(1)); - } - } - - public static class ColorCountReducer extends - Reducer<Text, IntWritable, AvroKey<CharSequence>, AvroValue<Integer>> { - - @Override - public void reduce(Text key, Iterable<IntWritable> values, - Context context) throws IOException, InterruptedException { - - int sum = 0; - for (IntWritable value : values) { - sum += value.get(); - } - context.write(new AvroKey<CharSequence>(key.toString()), new AvroValue<Integer>(sum)); - } - } - - public int run(String[] args) throws Exception { - if (args.length != 2) { - System.err.println("Usage: MapReduceColorCount <input path> <output path>"); - return -1; - } - - Job job = new Job(getConf()); - job.setJarByClass(MapReduceColorCount.class); - job.setJobName("Color Count"); - - FileInputFormat.setInputPaths(job, new Path(args[0])); - FileOutputFormat.setOutputPath(job, new Path(args[1])); - - job.setInputFormatClass(AvroKeyInputFormat.class); - job.setMapperClass(ColorCountMapper.class); - AvroJob.setInputKeySchema(job, User.getClassSchema()); - job.setMapOutputKeyClass(Text.class); - job.setMapOutputValueClass(IntWritable.class); - - job.setOutputFormatClass(AvroKeyValueOutputFormat.class); - job.setReducerClass(ColorCountReducer.class); - AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); - AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); - - return (job.waitForCompletion(true) ? 0 : 1); - } - - public static void main(String[] args) throws Exception { - int res = ToolRunner.run(new MapReduceColorCount(), args); - System.exit(res); - } -} - - -

- ColorCount reads in data files containing User records, - defined in examples/user.avsc, and counts the number of - instances of each favorite color. (This example draws inspiration from - the canonical WordCount MapReduce application.) This example uses the - old MapReduce API. See MapReduceAvroWordCount, found under - doc/examples/mr-example/src/main/java/example/ to see the new MapReduce - API example. The User - schema is defined as follows: -

- -{"namespace": "example.avro", - "type": "record", - "name": "User", - "fields": [ - {"name": "name", "type": "string"}, - {"name": "favorite_number", "type": ["int", "null"]}, - {"name": "favorite_color", "type": ["string", "null"]} - ] -} - -

- This schema is compiled into the User class used by - ColorCount via the Avro Maven plugin (see - examples/mr-example/pom.xml for how this is set up). -

-

- ColorCountMapper essentially takes a User as input and - extracts the User's favorite color, emitting the key-value - pair <favoriteColor, 1>. - ColorCountReducer then adds up how many occurrences of a particular - favorite color were emitted, and outputs the result as a - Pair record. These Pairs are serialized to an - Avro data file. -

-
- Running ColorCount -

- The ColorCount application is provided as a Maven project in the Avro - docs under examples/mr-example. To build the project, - including the code generation of the User schema, run: -

- -mvn compile - -

- Next, run GenerateData from examples/mr-examples to create an Avro data - file, input/users.avro, containing 20 Users with - favorite colors chosen randomly from a list: -

- -mvn exec:java -q -Dexec.mainClass=example.GenerateData - -

- Besides creating the data file, GenerateData prints the JSON - representations of the Users generated to stdout, for example: -

- -{"name": "user", "favorite_number": null, "favorite_color": "red"} -{"name": "user", "favorite_number": null, "favorite_color": "green"} -{"name": "user", "favorite_number": null, "favorite_color": "purple"} -{"name": "user", "favorite_number": null, "favorite_color": null} -... - -

- Now we're ready to run ColorCount. We specify our freshly-generated - input folder as the input path and output as our - output folder (note that MapReduce will not start a job if the output - folder already exists): -

- -mvn exec:java -q -Dexec.mainClass=example.MapredColorCount -Dexec.args="input output" - -

- Once ColorCount completes, checking the contents of the new - output directory should yield the following: -

- -$ ls output/ -part-00000.avro _SUCCESS - -

- You can check the contents of the generated Avro file using the avro-tools jar: -

- -$ java -jar /path/to/avro-tools-&AvroVersion;.jar tojson output/part-00000.avro -{"value": 3, "key": "blue"} -{"value": 7, "key": "green"} -{"value": 1, "key": "none"} -{"value": 2, "key": "orange"} -{"value": 3, "key": "purple"} -{"value": 2, "key": "red"} -{"value": 2, "key": "yellow"} - -
-
-

Now let's go over the ColorCount example in detail.

-
- Mapper - org.apache.hadoop.mapred API -

- The easiest way to use Avro data files as input to a MapReduce job is to - subclass AvroMapper. An AvroMapper defines a - map function that takes an Avro datum as input and outputs a key/value - pair represented as a Pair record. In the ColorCount - example, ColorCountMapper is an AvroMapper - that takes a User as input and outputs a - Pair<CharSequence, Integer>>, where the - CharSequence key is the user's favorite color and the - Integer value is 1. -

- -public static class ColorCountMapper extends AvroMapper<User, Pair<CharSequence, Integer>> { - @Override - public void map(User user, AvroCollector<Pair<CharSequence, Integer>> collector, Reporter reporter) - throws IOException { - CharSequence color = user.getFavoriteColor(); - // We need this check because the User.favorite_color field has type ["string", "null"] - if (color == null) { - color = "none"; - } - collector.collect(new Pair<CharSequence, Integer>(color, 1)); - } -} - -

- In order to use our AvroMapper, we must call - AvroJob.setMapperClass and - AvroJob.setInputSchema. -

- -AvroJob.setMapperClass(conf, ColorCountMapper.class); -AvroJob.setInputSchema(conf, User.getClassSchema()); - -

- Note that AvroMapper does not implement the - Mapper interface. Under the hood, the specified Avro data - files are deserialized into AvroWrappers containing the - actual data, which are processed by a Mapper that calls the - configured AvroMapper's map function. - AvroJob.setInputSchema sets up the relevant configuration - parameters needed to make this happen, thus you should not need to call - JobConf.setMapperClass, - JobConf.setInputFormat, - JobConf.setMapOutputKeyClass, - JobConf.setMapOutputValueClass, or - JobConf.setOutputKeyComparatorClass. -

-
-
- Mapper - org.apache.hadoop.mapreduce API -

- This document will not go into all the differences between the mapred and mapreduce APIs, - however will describe the main differences. As you can see, ColorCountMapper is now a - subclass of the Hadoop Mapper class and is passed an AvroKey as it's key. - - Additionally, the AvroJob method calls were slightly changed. -

- - public static class ColorCountMapper extends - Mapper<AvroKey<User>, NullWritable, Text, IntWritable> { - - @Override - public void map(AvroKey<User> key, NullWritable value, Context context) - throws IOException, InterruptedException { - - CharSequence color = key.datum().getFavoriteColor(); - if (color == null) { - color = "none"; - } - context.write(new Text(color.toString()), new IntWritable(1)); - } - } - -
-
- Reducer - org.apache.hadoop.mapred API -

- Analogously to AvroMapper, an AvroReducer - defines a reducer function that takes the key/value types output by an - AvroMapper (or any mapper that outputs Pairs) - and outputs a key/value pair represented a Pair record. In - the ColorCount example, ColorCountReducer is an - AvroReducer that takes the CharSequence key - representing a favorite color and the Iterable<Integer> - representing the counts for that color (they should all be 1 in this - example) and adds up the counts. -

- -public static class ColorCountReducer extends AvroReducer<CharSequence, Integer, - Pair<CharSequence, Integer>> { - @Override - public void reduce(CharSequence key, Iterable<Integer> values, - AvroCollector<Pair<CharSequence, Integer>> collector, - Reporter reporter) - throws IOException { - int sum = 0; - for (Integer value : values) { - sum += value; - } - collector.collect(new Pair<CharSequence, Integer>(key, sum)); - } -} - -

- In order to use our AvroReducer, we must call - AvroJob.setReducerClass and - AvroJob.setOutputSchema. -

- -AvroJob.setReducerClass(conf, ColorCountReducer.class); -AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING), - Schema.create(Type.INT))); - -

- Note that AvroReducer does not implement the - Reducer interface. The intermediate Pairs - output by the mapper are split into AvroKeys and - AvroValues, which are processed by a Reducer - that calls the configured AvroReducer's reduce function. - AvroJob.setOutputSchema sets up the relevant configuration - parameters needed to make this happen, thus you should not need to call - JobConf.setReducerClass, - JobConf.setOutputFormat, - JobConf.setOutputKeyClass, - JobConf.setMapOutputKeyClass, - JobConf.setMapOutputValueClass, or - JobConf.setOutputKeyComparatorClass. -

-
-
- Reduce - org.apache.hadoop.mapreduce API -

- As before we not detail every difference between the APIs. As with the Mapper - change ColorCountReducer is now a subclass of Reducer and AvroKey and AvroValue - are emitted. - - Additionally, the AvroJob method calls were slightly changed. -

- - public static class ColorCountReducer extends - Reducer<Text, IntWritable, AvroKey<CharSequence>, AvroValue<Integer>> { - - @Override - public void reduce(Text key, Iterable<IntWritable> values, - Context context) throws IOException, InterruptedException { - - int sum = 0; - for (IntWritable value : values) { - sum += value.get(); - } - context.write(new AvroKey<CharSequence>(key.toString()), new AvroValue<Integer>(sum)); - } - } - -
-
- Learning more -

- The mapred API allows users to mix Avro AvroMappers and - AvroReducers with non-Avro Mappers and - Reducers and the mapreduce API allows users input Avro - and output non-Avro or vice versa. -

- -

- The mapred package has API - org.apache.avro.mapred documentation as does the - org.apache.avro.mapreduce package. - MapReduce API (org.apache.hadoop.mapreduce). Similarily to the mapreduce package, - it's possible with the mapred API to implement your own Mappers and - Reducers directly using the public classes provided in - these libraries. See the AvroWordCount application, found under - examples/mr-example/src/main/java/example/AvroWordCount.java in - the Avro documentation, for an example of implementing a - Reducer that outputs Avro data using the old MapReduce API. - See the MapReduceAvroWordCount application, found under - examples/mr-example/src/main/java/example/MapReduceAvroWordCount.java in - the Avro documentation, for an example of implementing a - Reducer that outputs Avro data using the new MapReduce API. -

-
- -
diff --git a/doc/src/content/xdocs/sasl.xml b/doc/src/content/xdocs/sasl.xml deleted file mode 100644 index 514cca5aff1..00000000000 --- a/doc/src/content/xdocs/sasl.xml +++ /dev/null @@ -1,152 +0,0 @@ - - - - %avro-entities; -]> - -
- Apache Avro™ &AvroVersion; SASL Profile -
- -
- Introduction -

SASL (RFC 2222) - provides a framework for authentication and security of network - protocols. Each protocol that uses SASL is meant to define a - SASL profile. This document provides a SASL profile - for connection-based Avro RPC.

-
- -
- Overview -

SASL negotiation proceeds as a series of message interactions - over a connection between a client and server using a selected - SASL mechanism. The client starts this negotiation by - sending its chosen mechanism name with an initial (possibly - empty) message. Negotiation proceeds with the exchange of - messages until either side indicates success or failure. The - content of the messages is mechanism-specific. If the - negotiation succeeds, then the session can proceed over the - connection, otherwise it must be abandoned.

-

Some mechanisms continue to process session data after - negotiation (e.g., encrypting it), while some specify that - further session data is transmitted unmodifed.

-
- -
- Negotiation -
- Commands -

Avro SASL negotiation uses four one-byte commands.

-
    -
  • 0: START Used in a client's initial message.
  • -
  • 1: CONTINUE Used while negotiation is ongoing.
  • -
  • 2: FAIL Terminates negotiation unsuccessfully.
  • -
  • 3: COMPLETE Terminates negotiation sucessfully.
  • -
- -

The format of a START message is:

- | 0 | 4-byte mechanism name length | mechanism name | 4-byte payload length | payload data | - -

The format of a CONTINUE message is:

- | 1 | 4-byte payload length | payload data | - -

The format of a FAIL message is:

- | 2 | 4-byte message length | UTF-8 message | - -

The format of a COMPLETE message is:

- | 3 | 4-byte payload length | payload data | -
- -
- Process -

Negotiation is initiated by a client sending a START command - containing the client's chosen mechanism name and any - mechanism-specific payload data.

- -

The server and client then interchange some number - (possibly zero) of CONTINUE messages. Each message contains - payload data that is processed by the security mechanism to - generate the next message.

- -

Once either the client or server send a FAIL message then - negotiation has failed. UTF-8-encoded text is included in - the failure message. Once either a FAIL message has been - sent or received, or any other error occurs in the - negotiation, further communication on this connection must - cease.

- -

Once either the client or server send a COMPLETE message - then negotiation has completed successfully. Session data - may now be transmitted over the connection until it is - closed by either side.

-
- -
- -
- Session Data -

If no SASL QOP (quality of protection) is negotiated, then - all subsequent writes to/reads over this connection are - written/read unmodified. In particular, messages use - Avro framing, and are - of the form:

- | 4-byte frame length | frame data | ... | 4 zero bytes | -

If a SASL QOP is negotiated, then it must be used by the - connection for all subsequent messages. This is done by - wrapping each non-empty frame written using the security - mechanism and unwrapping each non-empty frame read. The - length written in each non-empty frame is the length of the - wrapped data. Complete frames must be passed to the security - mechanism for unwrapping. Unwrapped data is then passed to - the application as the content of the frame.

-

If at any point processing fails due to wrapping, unwrapping - or framing errors, then all further communication on this - connection must cease.

-
- -
- Anonymous Mechanism -

The SASL anonymous mechanism - (RFC 2245) is - quite simple to implement. In particular, an initial anonymous - request may be prefixed by the following static sequence:

- | 0 | 0009 | ANONYMOUS | 0000 | -

If a server uses the anonymous mechanism, it should check - that the mechanism name in the start message prefixing the first - request received is 'ANONYMOUS', then simply prefix its initial - response with a COMPLETE message of:

- | 3 | 0000 | -

If an anonymous server recieves some other mechanism name, - then it may respond with a FAIL message as simple as:

- | 2 | 0000 | -

Note that the anonymous mechanism need add no additional - round-trip messages between client and server. The START - message can be piggybacked on the initial request and the - COMPLETE or FAIL message can be piggybacked on the initial - response.

-
- -

Apache Avro, Avro, Apache, and the Avro and Apache logos are - trademarks of The Apache Software Foundation.

- - -
diff --git a/doc/src/content/xdocs/site.xml b/doc/src/content/xdocs/site.xml deleted file mode 100644 index d3dcbb9435c..00000000000 --- a/doc/src/content/xdocs/site.xml +++ /dev/null @@ -1,91 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/doc/src/content/xdocs/spec.xml b/doc/src/content/xdocs/spec.xml deleted file mode 100644 index 09a9d353d12..00000000000 --- a/doc/src/content/xdocs/spec.xml +++ /dev/null @@ -1,1624 +0,0 @@ - - - - %avro-entities; -]> - -
- Apache Avro™ &AvroVersion; Specification -
- - -
- Introduction - -

This document defines Apache Avro. It is intended to be the - authoritative specification. Implementations of Avro must - adhere to this document. -

- -
- -
- Schema Declaration -

A Schema is represented in JSON by one of:

-
    -
  • A JSON string, naming a defined type.
  • - -
  • A JSON object, of the form: - - {"type": "typeName" ...attributes...} - - where typeName is either a primitive or derived - type name, as defined below. Attributes not defined in this - document are permitted as metadata, but must not affect - the format of serialized data. -
  • -
  • A JSON array, representing a union of embedded types.
  • -
- -
- Primitive Types -

The set of primitive type names is:

-
    -
  • null: no value
  • -
  • boolean: a binary value
  • -
  • int: 32-bit signed integer
  • -
  • long: 64-bit signed integer
  • -
  • float: single precision (32-bit) IEEE 754 floating-point number
  • -
  • double: double precision (64-bit) IEEE 754 floating-point number
  • -
  • bytes: sequence of 8-bit unsigned bytes
  • -
  • string: unicode character sequence
  • -
- -

Primitive types have no specified attributes.

- -

Primitive type names are also defined type names. Thus, for - example, the schema "string" is equivalent to:

- - {"type": "string"} - -
- -
- Complex Types - -

Avro supports six kinds of complex types: records, enums, - arrays, maps, unions and fixed.

- -
- Records - -

Records use the type name "record" and support the following attributes:

-
    -
  • name: a JSON string providing the name - of the record (required).
  • -
  • namespace, a JSON string that qualifies the name;
  • -
  • doc: a JSON string providing documentation to the - user of this schema (optional).
  • -
  • aliases: a JSON array of strings, providing - alternate names for this record (optional).
  • -
  • fields: a JSON array, listing fields (required). - Each field is a JSON object with the following attributes: -
      -
    • name: a JSON string providing the name - of the field (required), and
    • -
    • doc: a JSON string describing this field - for users (optional).
    • -
    • type: a schema, as defined above
    • -
    • default: A default value for this - field, only used when reading instances that lack - the field for schema evolution purposes. The - presence of a default value does not make the - field optional at encoding time. Permitted values - depend on the field's schema type, according to the - table below. Default values for union fields correspond - to the first schema in the union. Default values for bytes - and fixed fields are JSON strings, where Unicode - code points 0-255 are mapped to unsigned 8-bit byte - values 0-255. Avro encodes a field even if its - value is equal to its default. - - - - - - - - - - - - - - -
      field default values
      avro typejson typeexample
      nullnullnull
      booleanbooleantrue
      int,longinteger1
      float,doublenumber1.1
      bytesstring"\u00FF"
      stringstring"foo"
      recordobject{"a": 1}
      enumstring"FOO"
      arrayarray[1]
      mapobject{"a": 1}
      fixedstring"\u00ff"
      -
    • -
    • order: specifies how this field - impacts sort ordering of this record (optional). - Valid values are "ascending" (the default), - "descending", or "ignore". For more details on how - this is used, see the sort - order section below.
    • -
    • aliases: a JSON array of strings, providing - alternate names for this field (optional).
    • -
    -
  • -
- -

For example, a linked-list of 64-bit values may be defined with:

- -{ - "type": "record", - "name": "LongList", - "aliases": ["LinkedLongs"], // old name for this - "fields" : [ - {"name": "value", "type": "long"}, // each element has a long - {"name": "next", "type": ["null", "LongList"]} // optional next element - ] -} - -
- -
- Enums - -

Enums use the type name "enum" and support the following - attributes:

-
    -
  • name: a JSON string providing the name - of the enum (required).
  • -
  • namespace, a JSON string that qualifies the name;
  • -
  • aliases: a JSON array of strings, providing - alternate names for this enum (optional).
  • -
  • doc: a JSON string providing documentation to the - user of this schema (optional).
  • -
  • symbols: a JSON array, listing symbols, - as JSON strings (required). All symbols in an enum must - be unique; duplicates are prohibited. Every symbol must - match the regular expression [A-Za-z_][A-Za-z0-9_]* - (the same requirement as for names).
  • -
  • default: A default value for this - enumeration, used during resolution when the reader - encounters a symbol from the writer that isn't defined - in the reader's schema (optional). The value provided - here must be a JSON string that's a member of - the symbols array. - See documentation on schema resolution for how this gets - used.
  • -
-

For example, playing card suits might be defined with:

- -{ - "type": "enum", - "name": "Suit", - "symbols" : ["SPADES", "HEARTS", "DIAMONDS", "CLUBS"] -} - -
- -
- Arrays -

Arrays use the type name "array" and support - a single attribute:

-
    -
  • items: the schema of the array's items.
  • -
-

For example, an array of strings is declared - with:

- -{ - "type": "array", - "items" : "string", - "default": [] -} - -
- -
- Maps -

Maps use the type name "map" and support - one attribute:

-
    -
  • values: the schema of the map's values.
  • -
-

Map keys are assumed to be strings.

-

For example, a map from string to long is declared - with:

- -{ - "type": "map", - "values" : "long", - "default": {} -} - -
- -
- Unions -

Unions, as mentioned above, are represented using JSON - arrays. For example, ["null", "string"] - declares a schema which may be either a null or string.

-

(Note that when a default - value is specified for a record field whose type is a - union, the type of the default value must match the - first element of the union. Thus, for unions - containing "null", the "null" is usually listed first, since - the default value of such unions is typically null.)

-

Unions may not contain more than one schema with the same - type, except for the named types record, fixed and enum. For - example, unions containing two array types or two map types - are not permitted, but two types with different names are - permitted. (Names permit efficient resolution when reading - and writing unions.)

-

Unions may not immediately contain other unions.

-
- -
- Fixed -

Fixed uses the type name "fixed" and supports - the following attributes:

-
    -
  • name: a string naming this fixed (required).
  • -
  • namespace, a string that qualifies the name;
  • -
  • aliases: a JSON array of strings, providing - alternate names for this enum (optional).
  • -
  • doc: a JSON string providing documentation to the - user of this schema (optional).
  • -
  • size: an integer, specifying the number - of bytes per value (required).
  • -
-

For example, 16-byte quantity may be declared with:

- {"type": "fixed", "size": 16, "name": "md5"} -
- - -
- -
- Names -

Record, enums and fixed are named types. Each has - a fullname that is composed of two parts; - a name and a namespace. Equality of names - is defined on the fullname.

-

The name portion of a fullname, record field names, and - enum symbols must:

-
    -
  • start with [A-Za-z_]
  • -
  • subsequently contain only [A-Za-z0-9_]
  • -
-

A namespace is a dot-separated sequence of such names. - The empty string may also be used as a namespace to indicate the - null namespace. - Equality of names (including field names and enum symbols) - as well as fullnames is case-sensitive.

-

The null namespace may not be used in a dot-separated - sequence of names. So the grammar for a namespace - is:

-

  <empty> | <name>[(<dot><name>)*]

-

In record, enum and fixed definitions, the fullname is - determined in one of the following ways:

-
    -
  • A name and namespace are both specified. For example, - one might use "name": "X", "namespace": - "org.foo" to indicate the - fullname org.foo.X.
  • -
  • A fullname is specified. If the name specified contains - a dot, then it is assumed to be a fullname, and any - namespace also specified is ignored. For example, - use "name": "org.foo.X" to indicate the - fullname org.foo.X.
  • -
  • A name only is specified, i.e., a name that contains no - dots. In this case the namespace is taken from the most - tightly enclosing schema or protocol. For example, - if "name": "X" is specified, and this occurs - within a field of the record definition - of org.foo.Y, then the fullname - is org.foo.X. If there is no enclosing - namespace then the null namespace is used.
  • -
-

References to previously defined names are as in the latter - two cases above: if they contain a dot they are a fullname, if - they do not contain a dot, the namespace is the namespace of - the enclosing definition.

-

Primitive type names have no namespace and their names may - not be defined in any namespace.

-

A schema or protocol may not contain multiple definitions - of a fullname. Further, a name must be defined before it is - used ("before" in the depth-first, left-to-right traversal of - the JSON parse tree, where the types attribute of - a protocol is always deemed to come "before" the - messages attribute.) -

-
- -
- Aliases -

Named types and fields may have aliases. An implementation - may optionally use aliases to map a writer's schema to the - reader's. This faciliates both schema evolution as well as - processing disparate datasets.

-

Aliases function by re-writing the writer's schema using - aliases from the reader's schema. For example, if the - writer's schema was named "Foo" and the reader's schema is - named "Bar" and has an alias of "Foo", then the implementation - would act as though "Foo" were named "Bar" when reading. - Similarly, if data was written as a record with a field named - "x" and is read as a record with a field named "y" with alias - "x", then the implementation would act as though "x" were - named "y" when reading.

-

A type alias may be specified either as a fully - namespace-qualified, or relative to the namespace of the name - it is an alias for. For example, if a type named "a.b" has - aliases of "c" and "x.y", then the fully qualified names of - its aliases are "a.c" and "x.y".

-
- -
- -
- Data Serialization and Deserialization - -

Binary encoded Avro data does not include type information or - field names. The benefit is that the serialized data is small, but - as a result a schema must always be used in order to read Avro data - correctly. The best way to ensure that the schema is structurally - identical to the one used to write the data is to use the exact same - schema.

- -

Therefore, files or systems that store Avro data should always - include the writer's schema for that data. Avro-based remote procedure - call (RPC) systems must also guarantee that remote recipients of data - have a copy of the schema used to write that data. In general, it is - advisable that any reader of Avro data should use a schema that is - the same (as defined more fully in - Parsing Canonical Form for - Schemas) as the schema that was used to write the data in order to - deserialize it correctly. Deserializing data into a newer schema is - accomplished by specifying an additional schema, the results of which are - described in Schema Resolution.

- -

In general, both serialization and deserialization proceed as a - depth-first, left-to-right traversal of the schema, serializing or - deserializing primitive types as they are encountered. Therefore, it is - possible, though not advisable, to read Avro data with a schema that - does not have the same Parsing Canonical Form as the schema with which - the data was written. In order for this to work, the serialized primitive - values must be compatible, in order value by value, with the items in the - deserialization schema. For example, int and long are always serialized - the same way, so an int could be deserialized as a long. Since the - compatibility of two schemas depends on both the data and the - serialization format (eg. binary is more permissive than JSON because JSON - includes field names, eg. a long that is too large will overflow an int), - it is simpler and more reliable to use schemas with identical Parsing - Canonical Form.

- -
- Encodings -

Avro specifies two serialization encodings: binary and - JSON. Most applications will use the binary encoding, as it - is smaller and faster. But, for debugging and web-based - applications, the JSON encoding may sometimes be - appropriate.

-
- -
- Binary Encoding -

Binary encoding does not include field names, self-contained - information about the types of individual bytes, nor field or - record separators. Therefore readers are wholly reliant on - the schema used when the data was encoded.

- -
- Primitive Types -

Primitive types are encoded in binary as follows:

-
    -
  • null is written as zero bytes.
  • -
  • a boolean is written as a single byte whose - value is either 0 (false) or 1 - (true).
  • -
  • int and long values are written - using variable-length - zig-zag coding. Some examples: - - - - - - - - - - - -
    valuehex
    000
    -101
    102
    -203
    204
    ...
    -647f
    64 80 01
    ...
    -
  • -
  • a float is written as 4 bytes. The float is - converted into a 32-bit integer using a method equivalent - to Java's floatToIntBits and then encoded - in little-endian format.
  • -
  • a double is written as 8 bytes. The double - is converted into a 64-bit integer using a method equivalent - to Java's - doubleToLongBits and then encoded in little-endian - format.
  • -
  • bytes are encoded as - a long followed by that many bytes of data. -
  • -
  • a string is encoded as - a long followed by that many bytes of UTF-8 - encoded character data. -

    For example, the three-character string "foo" would - be encoded as the long value 3 (encoded as - hex 06) followed by the UTF-8 encoding of - 'f', 'o', and 'o' (the hex bytes 66 6f - 6f): -

    - 06 66 6f 6f -
  • -
- -
- - -
- Complex Types -

Complex types are encoded in binary as follows:

- -
- Records -

A record is encoded by encoding the values of its - fields in the order that they are declared. In other - words, a record is encoded as just the concatenation of - the encodings of its fields. Field values are encoded per - their schema.

-

For example, the record schema

- - { - "type": "record", - "name": "test", - "fields" : [ - {"name": "a", "type": "long"}, - {"name": "b", "type": "string"} - ] - } - -

An instance of this record whose a field has - value 27 (encoded as hex 36) and - whose b field has value "foo" (encoded as hex - bytes 06 66 6f 6f), would be encoded simply - as the concatenation of these, namely the hex byte - sequence:

- 36 06 66 6f 6f -
- -
- Enums -

An enum is encoded by a int, representing - the zero-based position of the symbol in the schema.

-

For example, consider the enum:

- - {"type": "enum", "name": "Foo", "symbols": ["A", "B", "C", "D"] } - -

This would be encoded by an int between - zero and three, with zero indicating "A", and 3 indicating - "D".

-
- - -
- Arrays -

Arrays are encoded as a series of blocks. - Each block consists of a long count - value, followed by that many array items. A block with - count zero indicates the end of the array. Each item is - encoded per the array's item schema.

- -

If a block's count is negative, its absolute value is used, - and the count is followed immediately by a long - block size indicating the number of bytes in the - block. This block size permits fast skipping through data, - e.g., when projecting a record to a subset of its fields.

- -

For example, the array schema

- {"type": "array", "items": "long"} -

an array containing the items 3 and 27 could be encoded - as the long value 2 (encoded as hex 04) followed by long - values 3 and 27 (encoded as hex 06 36) - terminated by zero:

- 04 06 36 00 - -

The blocked representation permits one to read and write - arrays larger than can be buffered in memory, since one can - start writing items without knowing the full length of the - array.

- -
- -
- Maps -

Maps are encoded as a series of blocks. Each - block consists of a long count - value, followed by that many key/value pairs. A block - with count zero indicates the end of the map. Each item - is encoded per the map's value schema.

- -

If a block's count is negative, its absolute value is used, - and the count is followed immediately by a long - block size indicating the number of bytes in the - block. This block size permits fast skipping through data, - e.g., when projecting a record to a subset of its fields.

- -

The blocked representation permits one to read and write - maps larger than can be buffered in memory, since one can - start writing items without knowing the full length of the - map.

- -
- -
- Unions -

A union is encoded by first writing an int - value indicating the zero-based position within the - union of the schema of its value. The value is then - encoded per the indicated schema within the union.

-

For example, the union - schema ["null","string"] would encode:

-
    -
  • null as zero (the index of "null" in the union): - 00
  • -
  • the string "a" as one (the index of - "string" in the union, encoded as hex 02), - followed by the serialized string: - 02 02 61
  • -
-

NOTE: Currently for C/C++ implementations, the positions are practically an int, but theoretically a long. - In reality, we don't expect unions with 215M members

-
- -
- Fixed -

Fixed instances are encoded using the number of bytes - declared in the schema.

-
- -
- -
- -
- JSON Encoding - -

Except for unions, the JSON encoding is the same as is used - to encode field default - values.

- -

The value of a union is encoded in JSON as follows:

- -
    -
  • if its type is null, then it is encoded as - a JSON null;
  • -
  • otherwise it is encoded as a JSON object with one - name/value pair whose name is the type's name and whose - value is the recursively encoded value. For Avro's named - types (record, fixed or enum) the user-specified name is - used, for other types the type name is used.
  • -
- -

For example, the union - schema ["null","string","Foo"], where Foo is a - record name, would encode:

-
    -
  • null as null;
  • -
  • the string "a" as - {"string": "a"}; and
  • -
  • a Foo instance as {"Foo": {...}}, - where {...} indicates the JSON encoding of a - Foo instance.
  • -
- -

Note that the original schema is still required to correctly - process JSON-encoded data. For example, the JSON encoding does not - distinguish between int - and long, float - and double, records and maps, enums and strings, - etc.

- -
- -
- Single-object encoding - -

In some situations a single Avro serialized object is to be stored for a - longer period of time. One very common example is storing Avro records - for several weeks in an Apache Kafka topic.

-

In the period after a schema change this persistence system will contain records - that have been written with different schemas. So the need arises to know which schema - was used to write a record to support schema evolution correctly. - In most cases the schema itself is too large to include in the message, - so this binary wrapper format supports the use case more effectively.

- -
- Single object encoding specification -

Single Avro objects are encoded as follows:

-
    -
  1. A two-byte marker, C3 01, to show that the message is Avro and uses this single-record format (version 1).
  2. -
  3. The 8-byte little-endian CRC-64-AVRO fingerprint of the object's schema
  4. -
  5. The Avro object encoded using Avro's binary encoding
  6. -
-
- -

Implementations use the 2-byte marker to determine whether a payload is Avro. - This check helps avoid expensive lookups that resolve the schema from a - fingerprint, when the message is not an encoded Avro payload.

- -
- -
- -
- Sort Order - -

Avro defines a standard sort order for data. This permits - data written by one system to be efficiently sorted by another - system. This can be an important optimization, as sort order - comparisons are sometimes the most frequent per-object - operation. Note also that Avro binary-encoded data can be - efficiently ordered without deserializing it to objects.

- -

Data items may only be compared if they have identical - schemas. Pairwise comparisons are implemented recursively - with a depth-first, left-to-right traversal of the schema. - The first mismatch encountered determines the order of the - items.

- -

Two items with the same schema are compared according to the - following rules.

-
    -
  • null data is always equal.
  • -
  • boolean data is ordered with false before true.
  • -
  • int, long, float - and double data is ordered by ascending numeric - value.
  • -
  • bytes and fixed data are - compared lexicographically by unsigned 8-bit values.
  • -
  • string data is compared lexicographically by - Unicode code point. Note that since UTF-8 is used as the - binary encoding for strings, sorting of bytes and string - binary data is identical.
  • -
  • array data is compared lexicographically by - element.
  • -
  • enum data is ordered by the symbol's position - in the enum schema. For example, an enum whose symbols are - ["z", "a"] would sort "z" values - before "a" values.
  • -
  • union data is first ordered by the branch - within the union, and, within that, by the type of the - branch. For example, an ["int", "string"] - union would order all int values before all string values, - with the ints and strings themselves ordered as defined - above.
  • -
  • record data is ordered lexicographically by - field. If a field specifies that its order is: -
      -
    • "ascending", then the order of its values - is unaltered.
    • -
    • "descending", then the order of its values - is reversed.
    • -
    • "ignore", then its values are ignored - when sorting.
    • -
    -
  • -
  • map data may not be compared. It is an error - to attempt to compare data containing maps unless those maps - are in an "order":"ignore" record field. -
  • -
-
- -
- Object Container Files -

Avro includes a simple object container file format. A file - has a schema, and all objects stored in the file must be written - according to that schema, using binary encoding. Objects are - stored in blocks that may be compressed. Syncronization markers - are used between blocks to permit efficient splitting of files - for MapReduce processing.

- -

Files may include arbitrary user-specified metadata.

- -

A file consists of:

-
    -
  • A file header, followed by
  • -
  • one or more file data blocks.
  • -
- -

A file header consists of:

-
    -
  • Four bytes, ASCII 'O', 'b', 'j', followed by 1.
  • -
  • file metadata, including the schema.
  • -
  • The 16-byte, randomly-generated sync marker for this file.
  • -
- -

File metadata is written as if defined by the following map schema:

- {"type": "map", "values": "bytes"} - -

All metadata properties that start with "avro." are reserved. - The following file metadata properties are currently used:

-
    -
  • avro.schema contains the schema of objects - stored in the file, as JSON data (required).
  • -
  • avro.codec the name of the compression codec - used to compress blocks, as a string. Implementations - are required to support the following codecs: "null" and "deflate". - If codec is absent, it is assumed to be "null". The codecs - are described with more detail below.
  • -
- -

A file header is thus described by the following schema:

- -{"type": "record", "name": "org.apache.avro.file.Header", - "fields" : [ - {"name": "magic", "type": {"type": "fixed", "name": "Magic", "size": 4}}, - {"name": "meta", "type": {"type": "map", "values": "bytes"}}, - {"name": "sync", "type": {"type": "fixed", "name": "Sync", "size": 16}}, - ] -} - - -

A file data block consists of:

-
    -
  • A long indicating the count of objects in this block.
  • -
  • A long indicating the size in bytes of the serialized objects - in the current block, after any codec is applied
  • -
  • The serialized objects. If a codec is specified, this is - compressed by that codec.
  • -
  • The file's 16-byte sync marker.
  • -
-

Thus, each block's binary data can be efficiently extracted or skipped without - deserializing the contents. The combination of block size, object counts, and - sync markers enable detection of corrupt blocks and help ensure data integrity.

-
- Required Codecs -
- null -

The "null" codec simply passes through data uncompressed.

-
- -
- deflate -

The "deflate" codec writes the data block using the - deflate algorithm as specified in - RFC 1951, - and typically implemented using the zlib library. Note that this - format (unlike the "zlib format" in RFC 1950) does not have a - checksum. -

-
-
-
- Optional Codecs -
- bzip2 -

The "bzip2" codec uses the bzip2 - compression library.

-
- -
- snappy -

The "snappy" codec uses - Google's Snappy - compression library. Each compressed block is followed - by the 4-byte, big-endian CRC32 checksum of the - uncompressed data in the block.

-
- -
- xz -

The "xz" codec uses the XZ - compression library.

-
- -
- zstandard -

The "zstandard" codec uses - Facebook's Zstandard - compression library.

-
-
-
- -
- Protocol Declaration -

Avro protocols describe RPC interfaces. Like schemas, they are - defined with JSON text.

- -

A protocol is a JSON object with the following attributes:

-
    -
  • protocol, a string, the name of the protocol - (required);
  • -
  • namespace, an optional string that qualifies the name;
  • -
  • doc, an optional string describing this protocol;
  • -
  • types, an optional list of definitions of named types - (records, enums, fixed and errors). An error definition is - just like a record definition except it uses "error" instead - of "record". Note that forward references to named types - are not permitted.
  • -
  • messages, an optional JSON object whose keys are - message names and whose values are objects whose attributes - are described below. No two messages may have the same - name.
  • -
-

The name and namespace qualification rules defined for schema objects - apply to protocols as well.

- -
- Messages -

A message has attributes:

-
    -
  • a doc, an optional description of the message,
  • -
  • a request, a list of named, - typed parameter schemas (this has the same form - as the fields of a record declaration);
  • -
  • a response schema;
  • -
  • an optional union of declared error schemas. - The effective union has "string" - prepended to the declared union, to permit transmission of - undeclared "system" errors. For example, if the declared - error union is ["AccessError"], then the - effective union is ["string", "AccessError"]. - When no errors are declared, the effective error union - is ["string"]. Errors are serialized using - the effective union; however, a protocol's JSON - declaration contains only the declared union. -
  • -
  • an optional one-way boolean parameter.
  • -
-

A request parameter list is processed equivalently to an - anonymous record. Since record field lists may vary between - reader and writer, request parameters may also differ - between the caller and responder, and such differences are - resolved in the same manner as record field differences.

-

The one-way parameter may only be true when the response type - is "null" and no errors are listed.

-
-
- Sample Protocol -

For example, one may define a simple HelloWorld protocol with:

- -{ - "namespace": "com.acme", - "protocol": "HelloWorld", - "doc": "Protocol Greetings", - - "types": [ - {"name": "Greeting", "type": "record", "fields": [ - {"name": "message", "type": "string"}]}, - {"name": "Curse", "type": "error", "fields": [ - {"name": "message", "type": "string"}]} - ], - - "messages": { - "hello": { - "doc": "Say hello.", - "request": [{"name": "greeting", "type": "Greeting" }], - "response": "Greeting", - "errors": ["Curse"] - } - } -} - -
-
- -
- Protocol Wire Format - -
- Message Transport -

Messages may be transmitted via - different transport mechanisms.

- -

To the transport, a message is an opaque byte sequence.

- -

A transport is a system that supports:

-
    -
  • transmission of request messages -
  • -
  • receipt of corresponding response messages -

    Servers may send a response message back to the client - corresponding to a request message. The mechanism of - correspondance is transport-specific. For example, in - HTTP it is implicit, since HTTP directly supports requests - and responses. But a transport that multiplexes many - client threads over a single socket would need to tag - messages with unique identifiers.

    -
  • -
- -

Transports may be either stateless - or stateful. In a stateless transport, messaging - assumes no established connection state, while stateful - transports establish connections that may be used for multiple - messages. This distinction is discussed further in - the handshake section below.

- -
- HTTP as Transport -

When - HTTP - is used as a transport, each Avro message exchange is an - HTTP request/response pair. All messages of an Avro - protocol should share a single URL at an HTTP server. - Other protocols may also use that URL. Both normal and - error Avro response messages should use the 200 (OK) - response code. The chunked encoding may be used for - requests and responses, but, regardless the Avro request - and response are the entire content of an HTTP request and - response. The HTTP Content-Type of requests and responses - should be specified as "avro/binary". Requests should be - made using the POST method.

-

HTTP is used by Avro as a stateless transport.

-
-
- -
- Message Framing -

Avro messages are framed as a list of buffers.

-

Framing is a layer between messages and the transport. - It exists to optimize certain operations.

- -

The format of framed message data is:

-
    -
  • a series of buffers, where each buffer consists of: -
      -
    • a four-byte, big-endian buffer length, followed by
    • -
    • that many bytes of buffer data.
    • -
    -
  • -
  • A message is always terminated by a zero-length buffer.
  • -
- -

Framing is transparent to request and response message - formats (described below). Any message may be presented as a - single or multiple buffers.

- -

Framing can permit readers to more efficiently get - different buffers from different sources and for writers to - more efficiently store different buffers to different - destinations. In particular, it can reduce the number of - times large binary objects are copied. For example, if an RPC - parameter consists of a megabyte of file data, that data can - be copied directly to a socket from a file descriptor, and, on - the other end, it could be written directly to a file - descriptor, never entering user space.

- -

A simple, recommended, framing policy is for writers to - create a new segment whenever a single binary object is - written that is larger than a normal output buffer. Small - objects are then appended in buffers, while larger objects are - written as their own buffers. When a reader then tries to - read a large object the runtime can hand it an entire buffer - directly, without having to copy it.

-
- -
- Handshake - -

The purpose of the handshake is to ensure that the client - and the server have each other's protocol definition, so that - the client can correctly deserialize responses, and the server - can correctly deserialize requests. Both clients and servers - should maintain a cache of recently seen protocols, so that, - in most cases, a handshake will be completed without extra - round-trip network exchanges or the transmission of full - protocol text.

- -

RPC requests and responses may not be processed until a - handshake has been completed. With a stateless transport, all - requests and responses are prefixed by handshakes. With a - stateful transport, handshakes are only attached to requests - and responses until a successful handshake response has been - returned over a connection. After this, request and response - payloads are sent without handshakes for the lifetime of that - connection.

- -

The handshake process uses the following record schemas:

- - -{ - "type": "record", - "name": "HandshakeRequest", "namespace":"org.apache.avro.ipc", - "fields": [ - {"name": "clientHash", - "type": {"type": "fixed", "name": "MD5", "size": 16}}, - {"name": "clientProtocol", "type": ["null", "string"]}, - {"name": "serverHash", "type": "MD5"}, - {"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]} - ] -} -{ - "type": "record", - "name": "HandshakeResponse", "namespace": "org.apache.avro.ipc", - "fields": [ - {"name": "match", - "type": {"type": "enum", "name": "HandshakeMatch", - "symbols": ["BOTH", "CLIENT", "NONE"]}}, - {"name": "serverProtocol", - "type": ["null", "string"]}, - {"name": "serverHash", - "type": ["null", {"type": "fixed", "name": "MD5", "size": 16}]}, - {"name": "meta", - "type": ["null", {"type": "map", "values": "bytes"}]} - ] -} - - -
    -
  • A client first prefixes each request with - a HandshakeRequest containing just the hash of - its protocol and of the server's protocol - (clientHash!=null, clientProtocol=null, - serverHash!=null), where the hashes are 128-bit MD5 - hashes of the JSON protocol text. If a client has never - connected to a given server, it sends its hash as a guess of - the server's hash, otherwise it sends the hash that it - previously obtained from this server.
  • - -
  • The server responds with - a HandshakeResponse containing one of: -
      -
    • match=BOTH, serverProtocol=null, - serverHash=null if the client sent the valid hash - of the server's protocol and the server knows what - protocol corresponds to the client's hash. In this case, - the request is complete and the response data - immediately follows the HandshakeResponse.
    • - -
    • match=CLIENT, serverProtocol!=null, - serverHash!=null if the server has previously - seen the client's protocol, but the client sent an - incorrect hash of the server's protocol. The request is - complete and the response data immediately follows the - HandshakeResponse. The client must use the returned - protocol to process the response and should also cache - that protocol and its hash for future interactions with - this server.
    • - -
    • match=NONE if the server has not - previously seen the client's protocol. - The serverHash - and serverProtocol may also be non-null if - the server's protocol hash was incorrect. - -

      In this case the client must then re-submit its request - with its protocol text (clientHash!=null, - clientProtocol!=null, serverHash!=null) and the - server should respond with a successful match - (match=BOTH, serverProtocol=null, - serverHash=null) as above.

      -
    • -
    -
  • -
- -

The meta field is reserved for future - handshake enhancements.

- -
- -
- Call Format -

A call consists of a request message paired with - its resulting response or error message. Requests and - responses contain extensible metadata, and both kinds of - messages are framed as described above.

- -

The format of a call request is:

-
    -
  • request metadata, a map with values of - type bytes
  • -
  • the message name, an Avro string, - followed by
  • -
  • the message parameters. Parameters are - serialized according to the message's request - declaration.
  • -
- -

When the empty string is used as a message name a server - should ignore the parameters and return an empty response. A - client may use this to ping a server or to perform a handshake - without sending a protocol message.

- -

When a message is declared one-way and a stateful - connection has been established by a successful handshake - response, no response data is sent. Otherwise the format of - the call response is:

-
    -
  • response metadata, a map with values of - type bytes
  • -
  • a one-byte error flag boolean, followed by either: -
      -
    • if the error flag is false, the message response, - serialized per the message's response schema.
    • -
    • if the error flag is true, the error, - serialized per the message's effective error union - schema.
    • -
    -
  • -
-
- -
- -
- Schema Resolution - -

A reader of Avro data, whether from an RPC or a file, can - always parse that data because the original schema must be - provided along with the data. However, the reader may be - programmed to read data into a different schema. - For example, if the data was written with a different version - of the software than it is read, then fields may have been - added or removed from records. This section specifies how such - schema differences should be resolved.

- -

We refer to the schema used to write the data as - the writer's schema, and the schema that the - application expects the reader's schema. Differences - between these should be resolved as follows:

- -
    -
  • It is an error if the two schemas do not match.

    -

    To match, one of the following must hold:

    -
      -
    • both schemas are arrays whose item types match
    • -
    • both schemas are maps whose value types match
    • -
    • both schemas are enums whose (unqualified) names match
    • -
    • both schemas are fixed whose sizes and (unqualified) names match
    • -
    • both schemas are records with the same (unqualified) name
    • -
    • either schema is a union
    • -
    • both schemas have same primitive type
    • -
    • the writer's schema may be promoted to the - reader's as follows: -
        -
      • int is promotable to long, float, or double
      • -
      • long is promotable to float or double
      • -
      • float is promotable to double
      • -
      • string is promotable to bytes
      • -
      • bytes is promotable to string
      • -
      -
    • -
    -
  • - -
  • if both are records: -
      -
    • the ordering of fields may be different: fields are - matched by name.
    • - -
    • schemas for fields with the same name in both records - are resolved recursively.
    • - -
    • if the writer's record contains a field with a name - not present in the reader's record, the writer's value - for that field is ignored.
    • - -
    • if the reader's record schema has a field that - contains a default value, and writer's schema does not - have a field with the same name, then the reader should - use the default value from its field.
    • - -
    • if the reader's record schema has a field with no - default value, and writer's schema does not have a field - with the same name, an error is signalled.
    • -
    -
  • - -
  • if both are enums: -

    if the writer's symbol is not present in the reader's - enum and the reader has a default value, then - that value is used, otherwise an error is signalled.

    -
  • - -
  • if both are arrays: -

    This resolution algorithm is applied recursively to the reader's and - writer's array item schemas.

    -
  • - -
  • if both are maps: -

    This resolution algorithm is applied recursively to the reader's and - writer's value schemas.

    -
  • - -
  • if both are unions: -

    The first schema in the reader's union that matches the - selected writer's union schema is recursively resolved - against it. if none match, an error is signalled.

    -
  • - -
  • if reader's is a union, but writer's is not -

    The first schema in the reader's union that matches the - writer's schema is recursively resolved against it. If none - match, an error is signalled.

    -
  • - -
  • if writer's is a union, but reader's is not -

    If the reader's schema matches the selected writer's schema, - it is recursively resolved against it. If they do not - match, an error is signalled.

    -
  • - -
- -

A schema's "doc" fields are ignored for the purposes of schema resolution. Hence, - the "doc" portion of a schema may be dropped at serialization.

- -
- -
- Parsing Canonical Form for Schemas - -

One of the defining characteristics of Avro is that a reader - must use the schema used by the writer of the data in - order to know how to read the data. This assumption results in a data - format that's compact and also amenable to many forms of schema - evolution. However, the specification so far has not defined - what it means for the reader to have the "same" schema as the - writer. Does the schema need to be textually identical? Well, - clearly adding or removing some whitespace to a JSON expression - does not change its meaning. At the same time, reordering the - fields of records clearly does change the meaning. So - what does it mean for a reader to have "the same" schema as a - writer?

- -

Parsing Canonical Form is a transformation of a - writer's schema that let's us define what it means for two - schemas to be "the same" for the purpose of reading data written - against the schema. It is called Parsing Canonical Form - because the transformations strip away parts of the schema, like - "doc" attributes, that are irrelevant to readers trying to parse - incoming data. It is called Canonical Form because the - transformations normalize the JSON text (such as the order of - attributes) in a way that eliminates unimportant differences - between schemas. If the Parsing Canonical Forms of two - different schemas are textually equal, then those schemas are - "the same" as far as any reader is concerned, i.e., there is no - serialized data that would allow a reader to distinguish data - generated by a writer using one of the original schemas from - data generated by a writing using the other original schema. - (We sketch a proof of this property in a companion - document.)

- -

The next subsection specifies the transformations that define - Parsing Canonical Form. But with a well-defined canonical form, - it can be convenient to go one step further, transforming these - canonical forms into simple integers ("fingerprints") that can - be used to uniquely identify schemas. The subsection after next - recommends some standard practices for generating such - fingerprints.

- -
- Transforming into Parsing Canonical Form - -

Assuming an input schema (in JSON form) that's already - UTF-8 text for a valid Avro schema (including all - quotes as required by JSON), the following transformations - will produce its Parsing Canonical Form:

-
    -
  • [PRIMITIVES] Convert primitive schemas to their simple - form (e.g., int instead of - {"type":"int"}).
  • - -
  • [FULLNAMES] Replace short names with fullnames, using - applicable namespaces to do so. Then eliminate - namespace attributes, which are now redundant.
  • - -
  • [STRIP] Keep only attributes that are relevant to - parsing data, which are: type, - name, fields, - symbols, items, - values, size. Strip all others - (e.g., doc and aliases).
  • - -
  • [ORDER] Order the appearance of fields of JSON objects - as follows: name, type, - fields, symbols, - items, values, size. - For example, if an object has type, - name, and size fields, then the - name field should appear first, followed by the - type and then the size fields.
  • - -
  • [STRINGS] For all JSON string literals in the schema - text, replace any escaped characters (e.g., \uXXXX escapes) - with their UTF-8 equivalents.
  • - -
  • [INTEGERS] Eliminate quotes around and any leading - zeros in front of JSON integer literals (which appear in the - size attributes of fixed schemas).
  • - -
  • [WHITESPACE] Eliminate all whitespace in JSON outside of string literals.
  • -
-
- -
- Schema Fingerprints - -

"[A] fingerprinting algorithm is a procedure that maps an - arbitrarily large data item (such as a computer file) to a - much shorter bit string, its fingerprint, that - uniquely identifies the original data for all practical - purposes" (quoted from [Wikipedia]). - In the Avro context, fingerprints of Parsing Canonical Form - can be useful in a number of applications; for example, to - cache encoder and decoder objects, to tag data items with a - short substitute for the writer's full schema, and to quickly - negotiate common-case schemas between readers and writers.

- -

In designing fingerprinting algorithms, there is a - fundamental trade-off between the length of the fingerprint - and the probability of collisions. To help application - designers find appropriate points within this trade-off space, - while encouraging interoperability and ease of implementation, - we recommend using one of the following three algorithms when - fingerprinting Avro schemas:

- -
    -
  • When applications can tolerate longer fingerprints, we - recommend using the SHA-256 digest - algorithm to generate 256-bit fingerprints of Parsing - Canonical Forms. Most languages today have SHA-256 - implementations in their libraries.
  • - -
  • At the opposite extreme, the smallest fingerprint we - recommend is a 64-bit Rabin - fingerprint. Below, we provide pseudo-code for this - algorithm that can be easily translated into any programming - language. 64-bit fingerprints should guarantee uniqueness - for schema caches of up to a million entries (for such a - cache, the chance of a collision is 3E-8). We don't - recommend shorter fingerprints, as the chances of collisions - is too great (for example, with 32-bit fingerprints, a cache - with as few as 100,000 schemas has a 50% chance of having a - collision).
  • - -
  • Between these two extremes, we recommend using the MD5 message - digest to generate 128-bit fingerprints. These make - sense only where very large numbers of schemas are being - manipulated (tens of millions); otherwise, 64-bit - fingerprints should be sufficient. As with SHA-256, MD5 - implementations are found in most libraries today.
  • -
- -

These fingerprints are not meant to provide any - security guarantees, even the longer SHA-256-based ones. Most - Avro applications should be surrounded by security measures - that prevent attackers from writing random data and otherwise - interfering with the consumers of schemas. We recommend that - these surrounding mechanisms be used to prevent collision and - pre-image attacks (i.e., "forgery") on schema fingerprints, - rather than relying on the security properties of the - fingerprints themselves.

- -

Rabin fingerprints are cyclic - redundancy checks computed using irreducible polynomials. - In the style of the Appendix of RFC 1952 - (pg 10), which defines the CRC-32 algorithm, here's our - definition of the 64-bit AVRO fingerprinting algorithm:

- - -long fingerprint64(byte[] buf) { - if (FP_TABLE == null) initFPTable(); - long fp = EMPTY; - for (int i = 0; i < buf.length; i++) - fp = (fp >>> 8) ^ FP_TABLE[(int)(fp ^ buf[i]) & 0xff]; - return fp; -} - -static long EMPTY = 0xc15d213aa4d7a795L; -static long[] FP_TABLE = null; - -void initFPTable() { - FP_TABLE = new long[256]; - for (int i = 0; i < 256; i++) { - long fp = i; - for (int j = 0; j < 8; j++) - fp = (fp >>> 1) ^ (EMPTY & -(fp & 1L)); - FP_TABLE[i] = fp; - } -} - - -

Readers interested in the mathematics behind this - algorithm may want to read - Chapter 14 of the Second Edition of Hacker's Delight. - (Unlike RFC-1952 and the book chapter, we prepend - a single one bit to messages. We do this because CRCs ignore - leading zero bits, which can be problematic. Our code - prepends a one-bit by initializing fingerprints using - EMPTY, rather than initializing using zero as in - RFC-1952 and the book chapter.)

-
-
- -
- Logical Types - -

A logical type is an Avro primitive or complex type with extra attributes to - represent a derived type. The attribute logicalType must - always be present for a logical type, and is a string with the name of one of - the logical types listed later in this section. Other attributes may be defined - for particular logical types.

- -

A logical type is always serialized using its underlying Avro type so - that values are encoded in exactly the same way as the equivalent Avro - type that does not have a logicalType attribute. Language - implementations may choose to represent logical types with an - appropriate native type, although this is not required.

- -

Language implementations must ignore unknown logical types when - reading, and should use the underlying Avro type. If a logical type is - invalid, for example a decimal with scale greater than its precision, - then implementations should ignore the logical type and use the - underlying Avro type.

- -
- Decimal -

The decimal logical type represents an arbitrary-precision signed - decimal number of the form unscaled × 10-scale.

- -

A decimal logical type annotates Avro - bytes or fixed types. The byte array must - contain the two's-complement representation of the unscaled integer - value in big-endian byte order. The scale is fixed, and is specified - using an attribute.

- -

The following attributes are supported:

-
    -
  • scale, a JSON integer representing the scale - (optional). If not specified the scale is 0.
  • -
  • precision, a JSON integer representing the (maximum) - precision of decimals stored in this type (required).
  • -
- -

For example, the following schema represents decimal numbers with a - maximum precision of 4 and a scale of 2:

- -{ - "type": "bytes", - "logicalType": "decimal", - "precision": 4, - "scale": 2 -} - - -

Precision must be a positive integer greater than zero. If the - underlying type is a fixed, then the precision is - limited by its size. An array of length n can store at - most floor(log_10(28 × n - 1 - 1)) - base-10 digits of precision.

- -

Scale must be zero or a positive integer less than or equal to the - precision.

- -

For the purposes of schema resolution, two schemas that are - decimal logical types match if their scales and - precisions match.

- -
- -
- UUID -

- The uuid logical type represents a random generated universally unique identifier (UUID). -

- -

- A uuid logical type annotates an Avro string. The string has to conform with RFC-4122 -

-
- -
- Date -

- The date logical type represents a date within the calendar, with no reference to a particular time zone or time of day. -

-

- A date logical type annotates an Avro int, where the int stores the number of days from the unix epoch, 1 January 1970 (ISO calendar). -

-

The following schema represents a date:

- -{ - "type": "int", - "logicalType": "date" -} - -
- -
- Time (millisecond precision) -

- The time-millis logical type represents a time of day, with no reference to a particular calendar, time zone or date, with a precision of one millisecond. -

-

- A time-millis logical type annotates an Avro int, where the int stores the number of milliseconds after midnight, 00:00:00.000. -

-
- -
- Time (microsecond precision) -

- The time-micros logical type represents a time of day, with no reference to a particular calendar, time zone or date, with a precision of one microsecond. -

-

- A time-micros logical type annotates an Avro long, where the long stores the number of microseconds after midnight, 00:00:00.000000. -

-
- -
- Timestamp (millisecond precision) -

- The timestamp-millis logical type represents an instant on the global timeline, independent of a particular time zone or calendar, with a precision of one millisecond. - Please note that time zone information gets lost in this process. Upon reading a value back, we can only reconstruct the instant, but not the original representation. - In practice, such timestamps are typically displayed to users in their local time zones, therefore they may be displayed differently depending on the execution environment. -

-

- A timestamp-millis logical type annotates an Avro long, where the long stores the number of milliseconds from the unix epoch, 1 January 1970 00:00:00.000 UTC. -

-
- -
- Timestamp (microsecond precision) -

- The timestamp-micros logical type represents an instant on the global timeline, independent of a particular time zone or calendar, with a precision of one microsecond. - Please note that time zone information gets lost in this process. Upon reading a value back, we can only reconstruct the instant, but not the original representation. - In practice, such timestamps are typically displayed to users in their local time zones, therefore they may be displayed differently depending on the execution environment. -

-

- A timestamp-micros logical type annotates an Avro long, where the long stores the number of microseconds from the unix epoch, 1 January 1970 00:00:00.000000 UTC. -

-
- -
- Local timestamp (millisecond precision) -

- The local-timestamp-millis logical type represents a timestamp in a local timezone, regardless of what specific time zone is considered local, with a precision of one millisecond. -

-

- A local-timestamp-millis logical type annotates an Avro long, where the long stores the number of milliseconds, from 1 January 1970 00:00:00.000. -

-
- -
- Local timestamp (microsecond precision) -

- The local-timestamp-micros logical type represents a timestamp in a local timezone, regardless of what specific time zone is considered local, with a precision of one microsecond. -

-

- A local-timestamp-micros logical type annotates an Avro long, where the long stores the number of microseconds, from 1 January 1970 00:00:00.000000. -

-
- -
- Duration -

- The duration logical type represents an amount of time defined by a number of months, days and milliseconds. This is not equivalent to a number of milliseconds, because, depending on the moment in time from which the duration is measured, the number of days in the month and number of milliseconds in a day may differ. Other standard periods such as years, quarters, hours and minutes can be expressed through these basic periods. -

-

- A duration logical type annotates Avro fixed type of size 12, which stores three little-endian unsigned integers that represent durations at different granularities of time. The first stores a number in months, the second stores a number in days, and the third stores a number in milliseconds. -

-
- -
- -

Apache Avro, Avro, Apache, and the Avro and Apache logos are - trademarks of The Apache Software Foundation.

- - -
diff --git a/doc/src/content/xdocs/tabs.xml b/doc/src/content/xdocs/tabs.xml deleted file mode 100644 index 97e7d2c3813..00000000000 --- a/doc/src/content/xdocs/tabs.xml +++ /dev/null @@ -1,39 +0,0 @@ - - - - %avro-entities; -]> - - - - - - - - - - diff --git a/doc/src/resources/images/apache_feather.gif b/doc/src/resources/images/apache_feather.gif deleted file mode 100644 index 1a0c3e6b6d1..00000000000 Binary files a/doc/src/resources/images/apache_feather.gif and /dev/null differ diff --git a/doc/src/resources/images/avro-logo.png b/doc/src/resources/images/avro-logo.png deleted file mode 100644 index 4cbe12dd97b..00000000000 Binary files a/doc/src/resources/images/avro-logo.png and /dev/null differ diff --git a/doc/src/resources/images/favicon.ico b/doc/src/resources/images/favicon.ico deleted file mode 100644 index 161bcf7841c..00000000000 Binary files a/doc/src/resources/images/favicon.ico and /dev/null differ diff --git a/doc/src/skinconf.xml b/doc/src/skinconf.xml deleted file mode 100644 index 2328639c8ce..00000000000 --- a/doc/src/skinconf.xml +++ /dev/null @@ -1,350 +0,0 @@ - - - - - - - - - - - - true - - false - - true - - true - - - true - - - true - - - true - - - false - - - true - - - Avro - Serialization System - https://avro.apache.org/ - images/avro-logo.png - - - Apache - The Apache Software Foundation - https://www.apache.org/ - images/apache_feather.gif - - - - - - - images/favicon.ico - - - 2012 - The Apache Software Foundation. - https://www.apache.org/licenses/ - - - - - - - - - - - - - - - - - - - p.quote { - margin-left: 2em; - padding: .5em; - background-color: #f0f0f0; - font-family: monospace; - } - table.right { - text-align: right; - display: block; - } - - - - - - - - - - - - - - - - - - - - - - - - - 1in - 1in - 1.25in - 1in - - - - false - - - false - - - - - - Built with Apache Forrest - https://forrest.apache.org/ - images/built-with-forrest-button.png - 88 - 31 - - - - - - diff --git a/lang/c++/CMakeLists.txt b/lang/c++/CMakeLists.txt index 4a3793152e6..28729f243af 100644 --- a/lang/c++/CMakeLists.txt +++ b/lang/c++/CMakeLists.txt @@ -16,17 +16,21 @@ # specific language governing permissions and limitations # under the License. # -cmake_minimum_required (VERSION 3.1) +cmake_minimum_required (VERSION 3.20) set (CMAKE_LEGACY_CYGWIN_WIN32 0) if (NOT DEFINED CMAKE_CXX_STANDARD) - set(CMAKE_CXX_STANDARD 11) + set(CMAKE_CXX_STANDARD 17) +endif() + +if (CMAKE_CXX_STANDARD LESS 17) + message(FATAL_ERROR "Avro requires at least C++17") endif() set(CMAKE_CXX_STANDARD_REQUIRED ON) -if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.0) +if (APPLE) # Enable MACOSX_RPATH by default cmake_policy (SET CMP0042 NEW) endif() @@ -47,52 +51,75 @@ string(REPLACE "." ";" AVRO_VERSION ${AVRO_VERSION}) list(GET AVRO_VERSION 0 AVRO_VERSION_MAJOR) list(GET AVRO_VERSION 1 AVRO_VERSION_MINOR) list(GET AVRO_VERSION 2 AVRO_VERSION_PATCH) +set(AVRO_VERSION "${AVRO_VERSION_MAJOR}.${AVRO_VERSION_MINOR}.${AVRO_VERSION_PATCH}") project (Avro-cpp) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_SOURCE_DIR}) +option(AVRO_BUILD_EXECUTABLES "Build executables" ON) +option(AVRO_BUILD_TESTS "Build tests" ON) +option(AVRO_BUILD_SHARED "Build shared library" ON) +option(AVRO_BUILD_STATIC "Build static library" ON) +option(AVRO_USE_BOOST "Use Boost" OFF) + +if (NOT AVRO_BUILD_STATIC AND NOT AVRO_BUILD_SHARED) + message (FATAL_ERROR "At least one of AVRO_BUILD_STATIC or AVRO_BUILD_SHARED must be ON.") +endif () + if (WIN32 AND NOT CYGWIN AND NOT MSYS) add_definitions (/EHa) add_definitions ( -DNOMINMAX - -DBOOST_REGEX_DYN_LINK - -DBOOST_FILESYSTEM_DYN_LINK - -DBOOST_SYSTEM_DYN_LINK - -DBOOST_IOSTREAMS_DYN_LINK - -DBOOST_PROGRAM_OPTIONS_DYN_LINK -DBOOST_ALL_NO_LIB) endif() if (CMAKE_COMPILER_IS_GNUCXX) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") -if (AVRO_ADD_PROTECTOR_FLAGS) - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fstack-protector-all -D_GLIBCXX_DEBUG") - # Unset _GLIBCXX_DEBUG for avrogencpp.cc because using Boost Program Options - # leads to linking errors when compiling with _GLIBCXX_DEBUG as described on - # https://stackoverflow.com/questions/19729036/ - set_source_files_properties(impl/avrogencpp.cc PROPERTIES COMPILE_FLAGS "-U_GLIBCXX_DEBUG") -endif () + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wduplicated-cond -Wduplicated-branches -Wlogical-op -Wuseless-cast -Wconversion -pedantic -Werror") endif () +if (AVRO_BUILD_TESTS OR AVRO_USE_BOOST) + # Boost 1.70 and above provide a BoostConfig.cmake package configuration file. + # See https://cmake.org/cmake/help/latest/policy/CMP0167.html + find_package (Boost 1.70 REQUIRED CONFIG) +endif () -find_package (Boost 1.38 REQUIRED - COMPONENTS filesystem iostreams program_options regex system) +find_package(fmt QUIET) +if (NOT fmt_FOUND) + include(FetchContent) + FetchContent_Declare( + fmt + GIT_REPOSITORY https://github.com/fmtlib/fmt.git + GIT_TAG 10.2.1 + GIT_PROGRESS TRUE + USES_TERMINAL_DOWNLOAD TRUE + ) + FetchContent_MakeAvailable(fmt) +endif () -find_package(Snappy) -if (SNAPPY_FOUND) - set(SNAPPY_PKG libsnappy) +find_package(Snappy CONFIG) +if (Snappy_FOUND) + # Use CONFIG mode to guarantee that Snappy::snappy target exists if found. add_definitions(-DSNAPPY_CODEC_AVAILABLE) - message("Enabled snappy codec") -else (SNAPPY_FOUND) - set(SNAPPY_PKG "") - set(SNAPPY_LIBRARIES "") - set(SNAPPY_INCLUDE_DIR "") - message("Disabled snappy codec. libsnappy not found.") -endif (SNAPPY_FOUND) + message("Enabled snappy codec, version: ${Snappy_VERSION}") +else () + message("Disabled snappy codec.") +endif () + +find_package(zstd CONFIG) +if(zstd_FOUND) + message("Enabled zstd codec, version: ${zstd_VERSION}") + set(ZSTD_TARGET $,zstd::libzstd_shared,zstd::libzstd_static>) + add_definitions(-DZSTD_CODEC_AVAILABLE) +else() + message("Disabled zstd codec.") + set(ZSTD_TARGET "") +endif() -add_definitions (${Boost_LIB_DIAGNOSTIC_DEFINITIONS}) +# FindZLIB guarantees that ZLIB::ZLIB target exists if found +# See https://cmake.org/cmake/help/latest/module/FindZLIB.html#imported-targets +find_package(ZLIB REQUIRED) -include_directories (api ${CMAKE_CURRENT_BINARY_DIR} ${Boost_INCLUDE_DIRS}) +include_directories (include/avro ${CMAKE_CURRENT_BINARY_DIR}) set (AVRO_SOURCE_FILES impl/Compiler.cc impl/Node.cc impl/LogicalType.cc @@ -102,6 +129,8 @@ set (AVRO_SOURCE_FILES impl/Stream.cc impl/FileStream.cc impl/Generic.cc impl/GenericDatum.cc impl/DataFile.cc + impl/ZstdCompressWrapper.cc + impl/ZstdDecompressWrapper.cc impl/parsing/Symbol.cc impl/parsing/ValidatingCodec.cc impl/parsing/JsonCodec.cc @@ -109,106 +138,142 @@ set (AVRO_SOURCE_FILES impl/json/JsonIO.cc impl/json/JsonDom.cc impl/Resolver.cc impl/Validator.cc + impl/CustomAttributes.cc ) -add_library (avrocpp SHARED ${AVRO_SOURCE_FILES}) - -set_property (TARGET avrocpp - APPEND PROPERTY COMPILE_DEFINITIONS AVRO_DYN_LINK) - -add_library (avrocpp_s STATIC ${AVRO_SOURCE_FILES}) -target_include_directories(avrocpp_s PRIVATE ${SNAPPY_INCLUDE_DIR}) - -set_property (TARGET avrocpp avrocpp_s - APPEND PROPERTY COMPILE_DEFINITIONS AVRO_SOURCE) - -set_target_properties (avrocpp PROPERTIES - VERSION ${AVRO_VERSION_MAJOR}.${AVRO_VERSION_MINOR}.${AVRO_VERSION_PATCH}) - -set_target_properties (avrocpp_s PROPERTIES - VERSION ${AVRO_VERSION_MAJOR}.${AVRO_VERSION_MINOR}.${AVRO_VERSION_PATCH}) - -target_link_libraries (avrocpp ${Boost_LIBRARIES} ${SNAPPY_LIBRARIES}) -target_include_directories(avrocpp PRIVATE ${SNAPPY_INCLUDE_DIR}) - -add_executable (precompile test/precompile.cc) - -target_link_libraries (precompile avrocpp_s ${Boost_LIBRARIES} ${SNAPPY_LIBRARIES}) - -macro (gen file ns) - add_custom_command (OUTPUT ${file}.hh - COMMAND avrogencpp - -p - - -i ${CMAKE_CURRENT_SOURCE_DIR}/jsonschemas/${file} - -o ${file}.hh -n ${ns} -U - DEPENDS avrogencpp ${CMAKE_CURRENT_SOURCE_DIR}/jsonschemas/${file}) - add_custom_target (${file}_hh DEPENDS ${file}.hh) -endmacro (gen) - -gen (empty_record empty) -gen (bigrecord testgen) -gen (bigrecord_r testgen_r) -gen (bigrecord2 testgen2) -gen (tweet testgen3) -gen (union_array_union uau) -gen (union_map_union umu) -gen (union_conflict uc) -gen (recursive rec) -gen (reuse ru) -gen (circulardep cd) -gen (tree1 tr1) -gen (tree2 tr2) -gen (crossref cr) -gen (primitivetypes pt) -gen (cpp_reserved_words cppres) - -add_executable (avrogencpp impl/avrogencpp.cc) -target_link_libraries (avrogencpp avrocpp_s ${Boost_LIBRARIES} ${SNAPPY_LIBRARIES}) - -enable_testing() - -macro (unittest name) - add_executable (${name} test/${name}.cc) - target_link_libraries (${name} avrocpp ${Boost_LIBRARIES} ${SNAPPY_LIBRARIES}) - add_test (NAME ${name} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - COMMAND ${CMAKE_CURRENT_BINARY_DIR}/${name}) -endmacro (unittest) - -unittest (buffertest) -unittest (unittest) -unittest (SchemaTests) -unittest (LargeSchemaTests) -unittest (CodecTests) -unittest (StreamTests) -unittest (SpecificTests) -unittest (DataFileTests) -unittest (JsonTests) -unittest (AvrogencppTests) -unittest (CompilerTests) -unittest (AvrogencppTestReservedWords) - -add_dependencies (AvrogencppTestReservedWords cpp_reserved_words_hh) - -add_dependencies (AvrogencppTests bigrecord_hh bigrecord_r_hh bigrecord2_hh - tweet_hh - union_array_union_hh union_map_union_hh union_conflict_hh - recursive_hh reuse_hh circulardep_hh tree1_hh tree2_hh crossref_hh - primitivetypes_hh empty_record_hh) +function (setup_avro_lib target lib_type) + add_library (${target} ${lib_type} ${AVRO_SOURCE_FILES}) + target_compile_definitions (${target} PRIVATE AVRO_SOURCE) + if (lib_type STREQUAL SHARED) + target_compile_definitions (${target} PUBLIC AVRO_DYN_LINK) + endif () + set_target_properties (${target} PROPERTIES VERSION ${AVRO_VERSION}) + target_link_libraries (${target} PUBLIC + $ + $ + $> + $<$:$>> + $ + $> + $<$:$>> + ) + target_include_directories (${target} PUBLIC + $ + $ + ) +endfunction (setup_avro_lib) + +set (AVRO_INSTALL_LIBS) + +if (AVRO_BUILD_SHARED) + setup_avro_lib (avrocpp SHARED) + list (APPEND AVRO_INSTALL_LIBS avrocpp) + set (AVRO_LINK_LIB avrocpp) +endif () + +if (AVRO_BUILD_STATIC) + setup_avro_lib (avrocpp_s STATIC) + list (APPEND AVRO_INSTALL_LIBS avrocpp_s) + # Static takes precedence for linking if both are set. + set (AVRO_LINK_LIB avrocpp_s) +endif () + +if (AVRO_BUILD_EXECUTABLES) + add_executable (precompile test/precompile.cc) + + target_link_libraries (precompile ${AVRO_LINK_LIB}) + + add_executable (avrogencpp impl/avrogencpp.cc) + target_link_libraries (avrogencpp ${AVRO_LINK_LIB}) + target_compile_definitions(avrogencpp PRIVATE AVRO_VERSION="${AVRO_VERSION}") +endif () + +if (AVRO_BUILD_TESTS) + enable_testing() + + macro (gen file ns) + add_custom_command (OUTPUT ${file}.hh + COMMAND avrogencpp + -p - + -i ${CMAKE_CURRENT_SOURCE_DIR}/jsonschemas/${file} + -o ${file}.hh -n ${ns} + DEPENDS avrogencpp ${CMAKE_CURRENT_SOURCE_DIR}/jsonschemas/${file}) + add_custom_target (${file}_hh DEPENDS ${file}.hh) + endmacro (gen) + + gen (empty_record empty) + gen (bigrecord testgen) + gen (bigrecord_r testgen_r) + gen (bigrecord2 testgen2) + gen (tweet testgen3) + gen (union_array_union uau) + gen (union_map_union umu) + gen (union_conflict uc) + gen (union_empty_record uer) + gen (recursive rec) + gen (reuse ru) + gen (circulardep cd) + gen (tree1 tr1) + gen (tree2 tr2) + gen (crossref cr) + gen (primitivetypes pt) + gen (cpp_reserved_words cppres) + gen (cpp_reserved_words_union_typedef cppres_union) + gen (big_union big_union) + gen (union_redundant_types redundant_types) + + macro (unittest name) + add_executable (${name} test/${name}.cc) + target_link_libraries (${name} ${AVRO_LINK_LIB} ZLIB::ZLIB $ $<$:$>) + add_test (NAME ${name} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMAND ${CMAKE_CURRENT_BINARY_DIR}/${name}) + endmacro (unittest) + + unittest (buffertest) + unittest (unittest) + unittest (SchemaTests) + unittest (LargeSchemaTests) + unittest (CodecTests) + unittest (StreamTests) + unittest (SpecificTests) + unittest (DataFileTests) + unittest (JsonTests) + unittest (AvrogencppTests) + unittest (CompilerTests) + unittest (AvrogencppTestReservedWords) + unittest (CommonsSchemasTests) + + add_dependencies (AvrogencppTestReservedWords cpp_reserved_words_hh) + add_dependencies (AvrogencppTestReservedWords cpp_reserved_words_hh + cpp_reserved_words_union_typedef_hh) + + add_dependencies (AvrogencppTests bigrecord_hh bigrecord_r_hh bigrecord2_hh + tweet_hh + union_array_union_hh union_map_union_hh union_conflict_hh + recursive_hh reuse_hh circulardep_hh tree1_hh tree2_hh crossref_hh + primitivetypes_hh empty_record_hh cpp_reserved_words_union_typedef_hh + union_empty_record_hh big_union_hh union_redundant_types_hh) +endif () include (InstallRequiredSystemLibraries) +include (GNUInstallDirs) set (CPACK_PACKAGE_FILE_NAME "avrocpp-${AVRO_VERSION_MAJOR}") include (CPack) -install (TARGETS avrocpp avrocpp_s - LIBRARY DESTINATION lib - ARCHIVE DESTINATION lib - RUNTIME DESTINATION lib) +install (TARGETS ${AVRO_INSTALL_LIBS} + EXPORT avrocpp_targets + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) -install (TARGETS avrogencpp RUNTIME DESTINATION bin) +if (AVRO_BUILD_EXECUTABLES) + install (TARGETS avrogencpp EXPORT avrocpp_targets RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) +endif () -install (DIRECTORY api/ DESTINATION include/avro +install (DIRECTORY include/avro DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} FILES_MATCHING PATTERN *.hh) if (NOT CMAKE_BUILD_TYPE) @@ -216,3 +281,39 @@ if (NOT CMAKE_BUILD_TYPE) "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." FORCE) endif (NOT CMAKE_BUILD_TYPE) + +include(CMakePackageConfigHelpers) + +write_basic_package_version_file( + "${CMAKE_CURRENT_BINARY_DIR}/avro-cpp-config-version.cmake" + VERSION ${AVRO_VERSION} + COMPATIBILITY SameMajorVersion) + +configure_package_config_file( + "${CMAKE_CURRENT_SOURCE_DIR}/cmake/avro-cpp-config.cmake.in" + "${CMAKE_CURRENT_BINARY_DIR}/avro-cpp-config.cmake" + INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/avro-cpp +) + +install(EXPORT avrocpp_targets + NAMESPACE avro-cpp:: + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/avro-cpp + FILE "avro-cpp-targets.cmake" +) + +install(FILES + "${CMAKE_CURRENT_BINARY_DIR}/avro-cpp-config.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/avro-cpp-config-version.cmake" + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/avro-cpp +) + +cmake_path(APPEND pc_install_includedir "\${prefix}" "${CMAKE_INSTALL_INCLUDEDIR}") +cmake_path(APPEND pc_install_libdir "\${exec_prefix}" "${CMAKE_INSTALL_LIBDIR}") +configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/cmake/avro-cpp.pc.in" + "${CMAKE_CURRENT_BINARY_DIR}/avro-cpp.pc" + @ONLY +) +install(FILES "${CMAKE_CURRENT_BINARY_DIR}/avro-cpp.pc" + DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig" +) diff --git a/lang/c++/LICENSE b/lang/c++/LICENSE index d641439cded..d7f066e1d81 100644 --- a/lang/c++/LICENSE +++ b/lang/c++/LICENSE @@ -201,36 +201,6 @@ See the License for the specific language governing permissions and limitations under the License. ----------------------------------------------------------------------- -License for the m4 macros used by the C++ implementation: - -Files: -* lang/c++/m4/m4_ax_boost_system.m4 - Copyright (c) 2008 Thomas Porschberg - Copyright (c) 2008 Michael Tindal - Copyright (c) 2008 Daniel Casimiro -* lang/c++/m4/m4_ax_boost_asio.m4 - Copyright (c) 2008 Thomas Porschberg - Copyright (c) 2008 Pete Greenwell -* lang/c++/m4/m4_ax_boost_filesystem.m4 - Copyright (c) 2009 Thomas Porschberg - Copyright (c) 2009 Michael Tindal - Copyright (c) 2009 Roman Rybalko -* lang/c++/m4/m4_ax_boost_thread.m4 - Copyright (c) 2009 Thomas Porschberg - Copyright (c) 2009 Michael Tindal -* lang/c++/m4/m4_ax_boost_regex.m4 - Copyright (c) 2008 Thomas Porschberg - Copyright (c) 2008 Michael Tindal -* lang/c++/m4/m4_ax_boost_base.m4 - Copyright (c) 2008 Thomas Porschberg - -License text: -| Copying and distribution of this file, with or without modification, are -| permitted in any medium without royalty provided the copyright notice -| and this notice are preserved. This file is offered as-is, without any -| warranty. - ---------------------------------------------------------------------- License for the AVRO_BOOT_NO_TRAIT code in the C++ implementation: File: lang/c++/api/Boost.hh diff --git a/lang/c++/MainPage.dox b/lang/c++/MainPage.dox index eab49d3e627..91977fca2f1 100644 --- a/lang/c++/MainPage.dox +++ b/lang/c++/MainPage.dox @@ -55,15 +55,14 @@ One should be able to build Avro C++ on (1) any UNIX flavor including cygwin for In order to build Avro C++, one needs the following:
    -
  • A C++ compiler and runtime libraries. +
  • A C++17 or later compiler and runtime libraries.
  • Boost library version 1.38 or later. Apart from the header-only libraries of Boost, Avro C++ requires filesystem, iostreams, system and program_options libraries. Please see https://www.boost.org or your platform's documentation for details on how to set up Boost for your platform. -
  • CMake build tool version 2.6 or later. Please see https://www.cmake.org or your platform's documentation for details on how to set up CMake for your system. +
  • CMake build tool version 3.5 or later. Please see https://www.cmake.org or your platform's documentation for details on how to set up CMake for your system.
  • Python. If not already present, please consult your platform-specific documentation on how to install Python on your system.
For Ubuntu Linux, for example, you can have these by doing apt-get install for the following packages: -\ul \li cmake \li g++ \li libboost-dev @@ -73,7 +72,6 @@ For Ubuntu Linux, for example, you can have these by doing \li libboost-system-dev For Windows native builds, you need to install the following: -\ul \li cmake \li boost distribution from Boost consulting \li Visual studio @@ -336,4 +334,3 @@ corresponding to a given schema. Please see DataFile.hh for more details. */ - diff --git a/lang/c++/README b/lang/c++/README index 6b081f13a86..be5f2ff62d7 100644 --- a/lang/c++/README +++ b/lang/c++/README @@ -29,9 +29,9 @@ INSTRUCTIONS Pre-requisites: -To compile requires boost headers, and the boost regex library. Optionally, it requires Snappy compression library. If Snappy is available, it builds support for Snappy compression and skips it otherwise. (Please see your OS-specific instructions on how to install Boost and Snappy for your OS). +To compile requires boost headers. Optionally, it requires Snappy compression library. If Snappy is available, it builds support for Snappy compression and skips it otherwise. (Please see your OS-specific instructions on how to install Boost and Snappy for your OS). -To build one requires cmake 2.6 or later. +To build one requires cmake 3.5 or later and a compiler supporting C++17 or later. To generate a Makefile under Unix, MacOS (using GNU) or Cygwin use: @@ -39,8 +39,8 @@ mkdir build cd build cmake -G "Unix Makefiles" .. -If it doesn't work, either you are missing some packages (boost, flex or bison), -or you need to help configure locate them. +If it doesn't work, either you are missing boost package or you need to help +configure locate it. If the Makefile is configured correctly, then you can make and run tests: diff --git a/lang/c++/api/LogicalType.hh b/lang/c++/api/LogicalType.hh deleted file mode 100644 index 4d06e74f635..00000000000 --- a/lang/c++/api/LogicalType.hh +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef avro_LogicalType_hh__ -#define avro_LogicalType_hh__ - -#include - -#include "Config.hh" - -namespace avro { - -class AVRO_DECL LogicalType { -public: - enum Type { - NONE, - DECIMAL, - DATE, - TIME_MILLIS, - TIME_MICROS, - TIMESTAMP_MILLIS, - TIMESTAMP_MICROS, - DURATION, - UUID - }; - - explicit LogicalType(Type type); - - Type type() const; - - // Precision and scale can only be set for the DECIMAL logical type. - // Precision must be positive and scale must be either positive or zero. The - // setters will throw an exception if they are called on any type other - // than DECIMAL. - void setPrecision(int precision); - int precision() const { return precision_; } - void setScale(int scale); - int scale() const { return scale_; } - - void printJson(std::ostream &os) const; - -private: - Type type_; - int precision_; - int scale_; -}; - -} // namespace avro - -#endif diff --git a/lang/c++/build.sh b/lang/c++/build.sh index ac9964c75e5..11e1599d1d9 100755 --- a/lang/c++/build.sh +++ b/lang/c++/build.sh @@ -58,8 +58,8 @@ function do_doc() { function do_dist() { rm -rf $BUILD_CPP/ mkdir -p $BUILD_CPP - cp -r api AUTHORS build.sh CMakeLists.txt ChangeLog \ - LICENSE NOTICE impl jsonschemas NEWS parser README test examples \ + cp -r include AUTHORS build.sh CMakeLists.txt ChangeLog \ + LICENSE NOTICE impl jsonschemas NEWS README test examples \ $BUILD_CPP find $BUILD_CPP -name '.svn' | xargs rm -rf cp ../../share/VERSION.txt $BUILD_CPP @@ -71,10 +71,10 @@ function do_dist() { fi } -(mkdir -p build; cd build; cmake --version; cmake -G "Unix Makefiles" ..) for target in "$@" do +cmake -S . -B build case "$target" in lint) # some versions of cppcheck seem to require an explicit @@ -83,16 +83,20 @@ case "$target" in ;; test) - (cd build && cmake -G "Unix Makefiles" -D CMAKE_BUILD_TYPE=Debug -D AVRO_ADD_PROTECTOR_FLAGS=1 .. && make && cd .. \ + (cmake -S. -Bbuild -D CMAKE_BUILD_TYPE=Debug -D AVRO_ADD_PROTECTOR_FLAGS=1 && cmake --build build -- -k \ && ./build/buffertest \ && ./build/unittest \ + && ./build/AvrogencppTestReservedWords \ + && ./build/AvrogencppTests \ && ./build/CodecTests \ + && ./build/CommonsSchemasTests \ && ./build/CompilerTests \ - && ./build/StreamTests \ - && ./build/SpecificTests \ - && ./build/AvrogencppTests \ && ./build/DataFileTests \ - && ./build/SchemaTests) + && ./build/JsonTests \ + && ./build/LargeSchemaTests \ + && ./build/SchemaTests \ + && ./build/SpecificTests \ + && ./build/StreamTests) ;; xcode-test) @@ -104,7 +108,7 @@ case "$target" in ;; dist) - (cd build && cmake -G "Unix Makefiles" -D CMAKE_BUILD_TYPE=Release ..) + (cd build && cmake -D CMAKE_BUILD_TYPE=Release ..) do_dist do_doc ;; @@ -118,12 +122,12 @@ case "$target" in ;; clean) - (cd build && make clean) + (cmake --build build --target clean) rm -rf doc test.avro test?.df test??.df test_skip.df test_lastSync.df test_readRecordUsingLastSync.df ;; install) - (cd build && cmake -G "Unix Makefiles" -D CMAKE_BUILD_TYPE=Release .. && make install) + (cmake -S. -Bbuild -D CMAKE_BUILD_TYPE=Release && cmake --build build --target install) ;; *) diff --git a/lang/c++/cmake/avro-cpp-config.cmake.in b/lang/c++/cmake/avro-cpp-config.cmake.in new file mode 100644 index 00000000000..5104843f3a3 --- /dev/null +++ b/lang/c++/cmake/avro-cpp-config.cmake.in @@ -0,0 +1,68 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# This config sets the following variables in your project:: +# +# avro-cpp_FOUND - true if avro-cpp found on the system +# avro-cpp_VERSION - version of the found avro-cpp +# +# This config sets the following targets in your project:: +# +# avro-cpp::avrocpp_shared +# avro-cpp::avrocpp_static + +@PACKAGE_INIT@ + +include(CMakeFindDependencyMacro) + +if(DEFINED CMAKE_MODULE_PATH) + set(AVRO_CMAKE_MODULE_PATH_OLD ${CMAKE_MODULE_PATH}) +else() + unset(AVRO_CMAKE_MODULE_PATH_OLD) +endif() +set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}") + +find_dependency(ZLIB REQUIRED) +find_dependency(fmt REQUIRED) +if(@Snappy_FOUND@) + find_dependency(Snappy REQUIRED) +endif() +if(@Boost_FOUND@) + find_dependency(Boost 1.70 REQUIRED) +endif() +if(@zstd_FOUND@) + find_dependency(zstd REQUIRED) +endif() + +if(DEFINED AVRO_CMAKE_MODULE_PATH_OLD) + set(CMAKE_MODULE_PATH ${AVRO_CMAKE_MODULE_PATH_OLD}) + unset(AVRO_CMAKE_MODULE_PATH_OLD) +else() + unset(CMAKE_MODULE_PATH) +endif() + +include("${CMAKE_CURRENT_LIST_DIR}/avro-cpp-targets.cmake") + +if(@AVRO_BUILD_STATIC@) + add_library(avro-cpp::avrocpp_static ALIAS avro-cpp::avrocpp_s) +endif() +if(@AVRO_BUILD_SHARED@) + add_library(avro-cpp::avrocpp_shared ALIAS avro-cpp::avrocpp) +endif() + +check_required_components(avro-cpp) diff --git a/doc/forrest.properties b/lang/c++/cmake/avro-cpp.pc.in similarity index 71% rename from doc/forrest.properties rename to lang/c++/cmake/avro-cpp.pc.in index 32df46f523d..e37d602f369 100644 --- a/doc/forrest.properties +++ b/lang/c++/cmake/avro-cpp.pc.in @@ -16,7 +16,17 @@ # specific language governing permissions and limitations # under the License. # +prefix=@CMAKE_INSTALL_PREFIX@ +exec_prefix=${prefix} +includedir=@pc_install_includedir@ +libdir=@pc_install_libdir@ -# Make Forrest work with Java6 -forrest.validate.sitemap=false +Name: avro-cpp +Description: C++ library for parsing Avro data +URL: https://avro.apache.org/ +License: Apache-2.0 +Version: @AVRO_VERSION@ +Cflags: -I${includedir} +Libs: -L${libdir} -lavrocpp +Requires: fmt diff --git a/lang/c++/examples/cpx.hh b/lang/c++/examples/cpx.hh index 53c6ee130db..e240abb568a 100644 --- a/lang/c++/examples/cpx.hh +++ b/lang/c++/examples/cpx.hh @@ -16,14 +16,12 @@ * limitations under the License. */ - #ifndef CPX_HH_1278398428__H_ #define CPX_HH_1278398428__H_ - -#include "avro/Specific.hh" -#include "avro/Encoder.hh" #include "avro/Decoder.hh" +#include "avro/Encoder.hh" +#include "avro/Specific.hh" namespace c { struct cpx { @@ -31,18 +29,19 @@ struct cpx { double im; }; -} +} // namespace c namespace avro { -template<> struct codec_traits { - static void encode(Encoder& e, const c::cpx& v) { +template<> +struct codec_traits { + static void encode(Encoder &e, const c::cpx &v) { avro::encode(e, v.re); avro::encode(e, v.im); } - static void decode(Decoder& d, c::cpx& v) { + static void decode(Decoder &d, c::cpx &v) { avro::decode(d, v.re); avro::decode(d, v.im); } }; -} +} // namespace avro #endif diff --git a/lang/c++/examples/custom.cc b/lang/c++/examples/custom.cc index e3aa32da8c4..baf6d0cf796 100644 --- a/lang/c++/examples/custom.cc +++ b/lang/c++/examples/custom.cc @@ -18,19 +18,19 @@ #include -#include "avro/Encoder.hh" #include "avro/Decoder.hh" +#include "avro/Encoder.hh" #include "avro/Specific.hh" namespace avro { template -struct codec_traits > { - static void encode(Encoder& e, const std::complex& c) { +struct codec_traits> { + static void encode(Encoder &e, const std::complex &c) { avro::encode(e, std::real(c)); avro::encode(e, std::imag(c)); } - static void decode(Decoder& d, std::complex& c) { + static void decode(Decoder &d, std::complex &c) { T re, im; avro::decode(d, re); avro::decode(d, im); @@ -38,10 +38,8 @@ struct codec_traits > { } }; -} -int -main() -{ +} // namespace avro +int main() { std::unique_ptr out = avro::memoryOutputStream(); avro::EncoderPtr e = avro::binaryEncoder(); e->init(*out); diff --git a/lang/c++/examples/datafile.cc b/lang/c++/examples/datafile.cc index 2875e8fc738..6942a3074fa 100644 --- a/lang/c++/examples/datafile.cc +++ b/lang/c++/examples/datafile.cc @@ -18,25 +18,21 @@ #include -#include "cpx.hh" -#include "avro/Encoder.hh" -#include "avro/Decoder.hh" -#include "avro/ValidSchema.hh" #include "avro/Compiler.hh" #include "avro/DataFile.hh" +#include "avro/Decoder.hh" +#include "avro/Encoder.hh" +#include "avro/ValidSchema.hh" +#include "cpx.hh" - -avro::ValidSchema loadSchema(const char* filename) -{ +avro::ValidSchema loadSchema(const char *filename) { std::ifstream ifs(filename); avro::ValidSchema result; avro::compileJsonSchema(ifs, result); return result; } -int -main() -{ +int main() { avro::ValidSchema cpxSchema = loadSchema("cpx.json"); { @@ -59,4 +55,3 @@ main() } return 0; } - diff --git a/lang/c++/examples/generated.cc b/lang/c++/examples/generated.cc index f1394ee602a..42a8dd923f1 100644 --- a/lang/c++/examples/generated.cc +++ b/lang/c++/examples/generated.cc @@ -16,14 +16,11 @@ * limitations under the License. */ -#include "cpx.hh" -#include "avro/Encoder.hh" #include "avro/Decoder.hh" +#include "avro/Encoder.hh" +#include "cpx.hh" - -int -main() -{ +int main() { std::unique_ptr out = avro::memoryOutputStream(); avro::EncoderPtr e = avro::binaryEncoder(); e->init(*out); @@ -41,4 +38,3 @@ main() std::cout << '(' << c2.re << ", " << c2.im << ')' << std::endl; return 0; } - diff --git a/lang/c++/examples/generic.cc b/lang/c++/examples/generic.cc index 2675564e8ba..3abd37eccbc 100644 --- a/lang/c++/examples/generic.cc +++ b/lang/c++/examples/generic.cc @@ -16,20 +16,18 @@ * limitations under the License. */ -#include #include +#include #include "cpx.hh" #include "avro/Compiler.hh" -#include "avro/Encoder.hh" #include "avro/Decoder.hh" -#include "avro/Specific.hh" +#include "avro/Encoder.hh" #include "avro/Generic.hh" +#include "avro/Specific.hh" -int -main() -{ +int main() { std::ifstream ifs("cpx.json"); avro::ValidSchema cpxSchema; @@ -51,14 +49,14 @@ main() avro::decode(*d, datum); std::cout << "Type: " << datum.type() << std::endl; if (datum.type() == avro::AVRO_RECORD) { - const avro::GenericRecord& r = datum.value(); + const avro::GenericRecord &r = datum.value(); std::cout << "Field-count: " << r.fieldCount() << std::endl; if (r.fieldCount() == 2) { - const avro::GenericDatum& f0 = r.fieldAt(0); + const avro::GenericDatum &f0 = r.fieldAt(0); if (f0.type() == avro::AVRO_DOUBLE) { std::cout << "Real: " << f0.value() << std::endl; } - const avro::GenericDatum& f1 = r.fieldAt(1); + const avro::GenericDatum &f1 = r.fieldAt(1); if (f1.type() == avro::AVRO_DOUBLE) { std::cout << "Imaginary: " << f1.value() << std::endl; } diff --git a/lang/c++/examples/imaginary.hh b/lang/c++/examples/imaginary.hh index 774aefd1172..e483c0beb4a 100644 --- a/lang/c++/examples/imaginary.hh +++ b/lang/c++/examples/imaginary.hh @@ -16,31 +16,29 @@ * limitations under the License. */ - #ifndef IMAGINARY_HH_3460301992__H_ #define IMAGINARY_HH_3460301992__H_ - -#include "boost/any.hpp" -#include "avro/Specific.hh" -#include "avro/Encoder.hh" #include "avro/Decoder.hh" +#include "avro/Encoder.hh" +#include "avro/Specific.hh" namespace i { struct cpx { double im; }; -} +} // namespace i namespace avro { -template<> struct codec_traits { - static void encode(Encoder& e, const i::cpx& v) { +template<> +struct codec_traits { + static void encode(Encoder &e, const i::cpx &v) { avro::encode(e, v.im); } - static void decode(Decoder& d, i::cpx& v) { + static void decode(Decoder &d, i::cpx &v) { avro::decode(d, v.im); } }; -} +} // namespace avro #endif diff --git a/lang/c++/examples/resolving.cc b/lang/c++/examples/resolving.cc index 8ce9addb894..a3aec70bc05 100644 --- a/lang/c++/examples/resolving.cc +++ b/lang/c++/examples/resolving.cc @@ -22,24 +22,19 @@ #include "imaginary.hh" #include "avro/Compiler.hh" -#include "avro/Encoder.hh" #include "avro/Decoder.hh" -#include "avro/Specific.hh" +#include "avro/Encoder.hh" #include "avro/Generic.hh" +#include "avro/Specific.hh" - - -avro::ValidSchema load(const char* filename) -{ +avro::ValidSchema load(const char *filename) { std::ifstream ifs(filename); avro::ValidSchema result; avro::compileJsonSchema(ifs, result); return result; } -int -main() -{ +int main() { avro::ValidSchema cpxSchema = load("cpx.json"); avro::ValidSchema imaginarySchema = load("imaginary.json"); @@ -53,11 +48,10 @@ main() std::unique_ptr in = avro::memoryInputStream(*out); avro::DecoderPtr d = avro::resolvingDecoder(cpxSchema, imaginarySchema, - avro::binaryDecoder()); + avro::binaryDecoder()); d->init(*in); i::cpx c2; avro::decode(*d, c2); std::cout << "Imaginary: " << c2.im << std::endl; - } diff --git a/lang/c++/examples/schemaload.cc b/lang/c++/examples/schemaload.cc index d6b442dd960..63375af9a54 100644 --- a/lang/c++/examples/schemaload.cc +++ b/lang/c++/examples/schemaload.cc @@ -18,13 +18,10 @@ #include -#include "avro/ValidSchema.hh" #include "avro/Compiler.hh" +#include "avro/ValidSchema.hh" - -int -main() -{ +int main() { std::ifstream in("cpx.json"); avro::ValidSchema cpxSchema; diff --git a/lang/c++/examples/validating.cc b/lang/c++/examples/validating.cc index 64f0649fa5d..5479edeb3d4 100644 --- a/lang/c++/examples/validating.cc +++ b/lang/c++/examples/validating.cc @@ -16,23 +16,23 @@ * limitations under the License. */ -#include #include +#include #include "avro/Compiler.hh" -#include "avro/Encoder.hh" #include "avro/Decoder.hh" +#include "avro/Encoder.hh" #include "avro/Specific.hh" namespace avro { template -struct codec_traits > { - static void encode(Encoder& e, const std::complex& c) { +struct codec_traits> { + static void encode(Encoder &e, const std::complex &c) { avro::encode(e, std::real(c)); avro::encode(e, std::imag(c)); } - static void decode(Decoder& d, std::complex& c) { + static void decode(Decoder &d, std::complex &c) { T re, im; avro::decode(d, re); avro::decode(d, im); @@ -40,10 +40,8 @@ struct codec_traits > { } }; -} -int -main() -{ +} // namespace avro +int main() { std::ifstream ifs("cpx.json"); avro::ValidSchema cpxSchema; @@ -51,14 +49,14 @@ main() std::unique_ptr out = avro::memoryOutputStream(); avro::EncoderPtr e = avro::validatingEncoder(cpxSchema, - avro::binaryEncoder()); + avro::binaryEncoder()); e->init(*out); std::complex c1(1.0, 2.0); avro::encode(*e, c1); std::unique_ptr in = avro::memoryInputStream(*out); avro::DecoderPtr d = avro::validatingDecoder(cpxSchema, - avro::binaryDecoder()); + avro::binaryDecoder()); d->init(*in); std::complex c2; diff --git a/lang/c++/impl/BinaryDecoder.cc b/lang/c++/impl/BinaryDecoder.cc index 248b503342a..b334de7cf5d 100644 --- a/lang/c++/impl/BinaryDecoder.cc +++ b/lang/c++/impl/BinaryDecoder.cc @@ -74,14 +74,13 @@ bool BinaryDecoder::decodeBool() { } else if (v == 1) { return true; } - throw Exception(boost::format("Invalid value for bool: %1%") % v); + throw Exception("Invalid value for bool: {}", v); } int32_t BinaryDecoder::decodeInt() { auto val = doDecodeLong(); if (val < INT32_MIN || val > INT32_MAX) { - throw Exception( - boost::format("Value out of range for Avro int: %1%") % val); + throw Exception("Value out of range for Avro int: {}", val); } return static_cast(val); } @@ -105,8 +104,7 @@ double BinaryDecoder::decodeDouble() { size_t BinaryDecoder::doDecodeLength() { ssize_t len = decodeInt(); if (len < 0) { - throw Exception( - boost::format("Cannot have negative length: %1%") % len); + throw Exception("Cannot have negative length: {}", len); } return len; } @@ -166,13 +164,13 @@ size_t BinaryDecoder::doDecodeItemCount() { auto result = doDecodeLong(); if (result < 0) { doDecodeLong(); - return static_cast(-result); + return static_cast(-(result + 1)) + 1; } return static_cast(result); } size_t BinaryDecoder::arrayNext() { - return static_cast(doDecodeLong()); + return doDecodeItemCount(); } size_t BinaryDecoder::skipArray() { diff --git a/lang/c++/impl/Compiler.cc b/lang/c++/impl/Compiler.cc index d76546f317d..d0ac20d0204 100644 --- a/lang/c++/impl/Compiler.cc +++ b/lang/c++/impl/Compiler.cc @@ -15,11 +15,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include + #include +#include #include #include "Compiler.hh" +#include "CustomAttributes.hh" +#include "NodeConcepts.hh" #include "Schema.hh" #include "Stream.hh" #include "Types.hh" @@ -91,9 +94,17 @@ static NodePtr makeNode(const string &t, SymbolTable &st, const string &ns) { auto it = st.find(n); if (it != st.end()) { - return NodePtr(new NodeSymbolic(asSingleAttribute(n), it->second)); + // Return the raw NodePtr instead of creating a new "NodeSymbolic" + // via "NodePtr(new NodeSymbolic(asSingleAttribute(n), it->second))" + // in order to support externally resolved named references. + // This is safe because the validator canonicalizes duplicates: + // when it sees the same named node again (including self-recursion), + // it replaces that leaf with a NodeSymbolic via "setLeafToSymbolic". + // So even if the raw NodePtr is returned initially, validation + // converts repeats to symbolic links. + return it->second; } - throw Exception(boost::format("Unknown type: %1%") % n.fullname()); + throw Exception("Unknown type: {}", n); } /** Returns "true" if the field is in the container */ @@ -109,7 +120,7 @@ json::Object::const_iterator findField(const Entity &e, template void ensureType(const Entity &e, const string &name) { if (e.type() != json::type_traits::type()) { - throw Exception(boost::format("Json field \"%1%\" is not a %2%: %3%") % name % json::type_traits::name() % e.toString()); + throw Exception("Json field \"{}\" is not a {}: {}", name, json::type_traits::name(), e.toString()); } } @@ -133,7 +144,21 @@ int64_t getLongField(const Entity &e, const Object &m, // Unescape double quotes (") for de-serialization. This method complements the // method NodeImpl::escape() which is used for serialization. static void unescape(string &s) { - boost::replace_all(s, "\\\"", "\""); + size_t writePos = 0, readPos = 0; + while (readPos < s.length()) { + if (readPos + 1 < s.length() && s[readPos] == '\\' && s[readPos + 1] == '\"') { + s[writePos++] = '\"'; + readPos += 2; + } else if (writePos != readPos) { + s[writePos++] = s[readPos++]; + } else { + writePos++; + readPos++; + } + } + if (writePos != s.length()) { + s.resize(writePos); + } } string getDocField(const Entity &e, const Object &m) { @@ -144,16 +169,20 @@ string getDocField(const Entity &e, const Object &m) { struct Field { const string name; + const vector aliases; const NodePtr schema; const GenericDatum defaultValue; - Field(string n, NodePtr v, GenericDatum dv) : name(std::move(n)), schema(std::move(v)), defaultValue(std::move(dv)) {} + const CustomAttributes customAttributes; + + Field(string n, vector a, NodePtr v, GenericDatum dv, const CustomAttributes &ca) + : name(std::move(n)), aliases(std::move(a)), schema(std::move(v)), defaultValue(std::move(dv)), customAttributes(ca) {} }; static void assertType(const Entity &e, EntityType et) { if (e.type() != et) { - throw Exception(boost::format("Unexpected type for default value: " - "Expected %1%, but found %2% in line %3%") - % json::typeToString(et) % json::typeToString(e.type()) % e.line()); + throw Exception( + "Unexpected type for default value: Expected {}, but found {} in line {}", + json::typeToString(et), json::typeToString(e.type()), e.line()); } } @@ -212,9 +241,9 @@ static GenericDatum makeGenericDatum(NodePtr n, for (size_t i = 0; i < n->leaves(); ++i) { auto it = v.find(n->nameAt(i)); if (it == v.end()) { - throw Exception(boost::format( - "No value found in default for %1%") - % n->nameAt(i)); + throw Exception( + "No value found in default for {}", + n->nameAt(i)); } result.setFieldAt(i, makeGenericDatum(n->leafAt(i), it->second, st)); @@ -252,13 +281,40 @@ static GenericDatum makeGenericDatum(NodePtr n, case AVRO_FIXED: assertType(e, json::EntityType::String); return GenericDatum(n, GenericFixed(n, toBin(e.bytesValue()))); - default: throw Exception(boost::format("Unknown type: %1%") % t); + default: throw Exception("Unknown type: {}", t); + } +} + +static const std::unordered_set &getKnownFields() { + // return known fields + static const std::unordered_set kKnownFields = + {"name", "type", "aliases", "default", "doc", "size", "logicalType", + "values", "precision", "scale", "namespace", "items", "symbols"}; + return kKnownFields; +} + +static void getCustomAttributes(const Object &m, CustomAttributes &customAttributes) { + // Don't add known fields on primitive type and fixed type into custom + // fields. + const std::unordered_set &kKnownFields = getKnownFields(); + for (const auto &entry : m) { + if (kKnownFields.find(entry.first) == kKnownFields.end()) { + bool addQuotes = entry.second.type() == json::EntityType::String; + customAttributes.addAttribute(entry.first, entry.second.toLiteralString(), addQuotes); + } } } static Field makeField(const Entity &e, SymbolTable &st, const string &ns) { const Object &m = e.objectValue(); - const string &n = getStringField(e, m, "name"); + string n = getStringField(e, m, "name"); + vector aliases; + string aliasesName = "aliases"; + if (containsField(m, aliasesName)) { + for (const auto &alias : getArrayField(e, m, aliasesName)) { + aliases.emplace_back(alias.stringValue()); + } + } auto it = findField(e, m, "type"); auto it2 = m.find("default"); NodePtr node = makeNode(it->second, st, ns); @@ -266,31 +322,38 @@ static Field makeField(const Entity &e, SymbolTable &st, const string &ns) { node->setDoc(getDocField(e, m)); } GenericDatum d = (it2 == m.end()) ? GenericDatum() : makeGenericDatum(node, it2->second, st); - return Field(n, node, d); + // Get custom attributes + CustomAttributes customAttributes; + getCustomAttributes(m, customAttributes); + return Field(std::move(n), std::move(aliases), node, d, customAttributes); } // Extended makeRecordNode (with doc). static NodePtr makeRecordNode(const Entity &e, const Name &name, const string *doc, const Object &m, SymbolTable &st, const string &ns) { - const Array &v = getArrayField(e, m, "fields"); concepts::MultiAttribute fieldNames; + vector> fieldAliases; concepts::MultiAttribute fieldValues; + concepts::MultiAttribute customAttributes; vector defaultValues; - - for (const auto &it : v) { + string fields = "fields"; + for (const auto &it : getArrayField(e, m, fields)) { Field f = makeField(it, st, ns); fieldNames.add(f.name); + fieldAliases.push_back(f.aliases); fieldValues.add(f.schema); defaultValues.push_back(f.defaultValue); + customAttributes.add(f.customAttributes); } + NodeRecord *node; if (doc == nullptr) { node = new NodeRecord(asSingleAttribute(name), fieldValues, fieldNames, - defaultValues); + fieldAliases, defaultValues, customAttributes); } else { node = new NodeRecord(asSingleAttribute(name), asSingleAttribute(*doc), - fieldValues, fieldNames, defaultValues); + fieldValues, fieldNames, fieldAliases, defaultValues, customAttributes); } return NodePtr(node); } @@ -305,11 +368,12 @@ static LogicalType makeLogicalType(const Entity &e, const Object &m) { if (typeField == "decimal") { LogicalType decimalType(LogicalType::DECIMAL); try { - decimalType.setPrecision(getLongField(e, m, "precision")); + // Precision probably won't go over 38 and scale beyond -77/+77 + decimalType.setPrecision(static_cast(getLongField(e, m, "precision"))); if (containsField(m, "scale")) { - decimalType.setScale(getLongField(e, m, "scale")); + decimalType.setScale(static_cast(getLongField(e, m, "scale"))); } - } catch (Exception &ex) { + } catch (const Exception &) { // If any part of the logical type is malformed, per the standard we // must ignore the whole attribute. return LogicalType(LogicalType::NONE); @@ -318,7 +382,11 @@ static LogicalType makeLogicalType(const Entity &e, const Object &m) { } LogicalType::Type t = LogicalType::NONE; - if (typeField == "date") + if (typeField == "big-decimal" + && !containsField(m, "precision") + && !containsField(m, "scale")) + t = LogicalType::BIG_DECIMAL; + else if (typeField == "date") t = LogicalType::DATE; else if (typeField == "time-millis") t = LogicalType::TIME_MILLIS; @@ -328,20 +396,35 @@ static LogicalType makeLogicalType(const Entity &e, const Object &m) { t = LogicalType::TIMESTAMP_MILLIS; else if (typeField == "timestamp-micros") t = LogicalType::TIMESTAMP_MICROS; + else if (typeField == "timestamp-nanos") + t = LogicalType::TIMESTAMP_NANOS; + else if (typeField == "local-timestamp-millis") + t = LogicalType::LOCAL_TIMESTAMP_MILLIS; + else if (typeField == "local-timestamp-micros") + t = LogicalType::LOCAL_TIMESTAMP_MICROS; + else if (typeField == "local-timestamp-nanos") + t = LogicalType::LOCAL_TIMESTAMP_NANOS; else if (typeField == "duration") t = LogicalType::DURATION; else if (typeField == "uuid") t = LogicalType::UUID; + else { + auto custom = CustomLogicalTypeRegistry::instance().create(typeField, e.toString()); + if (custom != nullptr) { + return LogicalType(std::move(custom)); + } + } return LogicalType(t); } static NodePtr makeEnumNode(const Entity &e, const Name &name, const Object &m) { - const Array &v = getArrayField(e, m, "symbols"); + string symbolsName = "symbols"; + const Array &v = getArrayField(e, m, symbolsName); concepts::MultiAttribute symbols; for (const auto &it : v) { if (it.type() != json::EntityType::String) { - throw Exception(boost::format("Enum symbol not a string: %1%") % it.toString()); + throw Exception("Enum symbol not a string: {}", it.toString()); } symbols.add(it.stringValue()); } @@ -349,20 +432,30 @@ static NodePtr makeEnumNode(const Entity &e, if (containsField(m, "doc")) { node->setDoc(getDocField(e, m)); } + + CustomAttributes customAttributes; + getCustomAttributes(m, customAttributes); + node->addCustomAttributesForField(customAttributes); + return node; } static NodePtr makeFixedNode(const Entity &e, const Name &name, const Object &m) { - int v = static_cast(getLongField(e, m, "size")); + int64_t v = getLongField(e, m, "size"); if (v <= 0) { - throw Exception(boost::format("Size for fixed is not positive: %1%") % e.toString()); + throw Exception("Size for fixed is not positive: {}", e.toString()); } NodePtr node = - NodePtr(new NodeFixed(asSingleAttribute(name), asSingleAttribute(v))); + NodePtr(new NodeFixed(asSingleAttribute(name), asSingleAttribute(static_cast(v)))); if (containsField(m, "doc")) { node->setDoc(getDocField(e, m)); } + + CustomAttributes customAttributes; + getCustomAttributes(m, customAttributes); + node->addCustomAttributesForField(customAttributes); + return node; } @@ -374,6 +467,9 @@ static NodePtr makeArrayNode(const Entity &e, const Object &m, if (containsField(m, "doc")) { node->setDoc(getDocField(e, m)); } + CustomAttributes customAttributes; + getCustomAttributes(m, customAttributes); + node->addCustomAttributesForField(customAttributes); return node; } @@ -386,27 +482,42 @@ static NodePtr makeMapNode(const Entity &e, const Object &m, if (containsField(m, "doc")) { node->setDoc(getDocField(e, m)); } + + CustomAttributes customAttributes; + getCustomAttributes(m, customAttributes); + node->addCustomAttributesForField(customAttributes); + return node; } static Name getName(const Entity &e, const Object &m, const string &ns) { const string &name = getStringField(e, m, "name"); + Name result; if (isFullName(name)) { - return Name(name); + result = Name(name); } else { auto it = m.find("namespace"); if (it != m.end()) { if (it->second.type() != json::type_traits::type()) { - throw Exception(boost::format( - "Json field \"%1%\" is not a %2%: %3%") - % "namespace" % json::type_traits::name() % it->second.toString()); + throw Exception( + "Json field \"namespace\" is not a string: {}", + it->second.toString()); } - Name result = Name(name, it->second.stringValue()); - return result; + result = Name(name, it->second.stringValue()); + } else { + result = Name(name, ns); } - return Name(name, ns); } + + std::string aliases = "aliases"; + if (containsField(m, aliases)) { + for (const auto &alias : getArrayField(e, m, aliases)) { + result.addAlias(alias.stringValue()); + } + } + + return result; } static NodePtr makeNode(const Entity &e, const Object &m, @@ -445,18 +556,17 @@ static NodePtr makeNode(const Entity &e, const Object &m, if (result) { try { result->setLogicalType(makeLogicalType(e, m)); - } catch (Exception &ex) { + } catch (const Exception &) { // Per the standard we must ignore the logical type attribute if it // is malformed. } return result; } - throw Exception(boost::format("Unknown type definition: %1%") - % e.toString()); + throw Exception("Unknown type definition: %1%", e.toString()); } -static NodePtr makeNode(const Entity &e, const Array &m, +static NodePtr makeNode(const Entity &, const Array &m, SymbolTable &st, const string &ns) { concepts::MultiAttribute mm; for (const auto &it : m) { @@ -470,13 +580,13 @@ static NodePtr makeNode(const json::Entity &e, SymbolTable &st, const string &ns case json::EntityType::String: return makeNode(e.stringValue(), st, ns); case json::EntityType::Obj: return makeNode(e, e.objectValue(), st, ns); case json::EntityType::Arr: return makeNode(e, e.arrayValue(), st, ns); - default: throw Exception(boost::format("Invalid Avro type: %1%") % e.toString()); + default: throw Exception("Invalid Avro type: {}", e.toString()); } } json::Object::const_iterator findField(const Entity &e, const Object &m, const string &fieldName) { auto it = m.find(fieldName); if (it == m.end()) { - throw Exception(boost::format("Missing Json field \"%1%\": %2%") % fieldName % e.toString()); + throw Exception("Missing Json field \"{}\": {}", fieldName, e.toString()); } else { return it; } @@ -536,4 +646,23 @@ AVRO_DECL bool compileJsonSchema(std::istream &is, ValidSchema &schema, string & } } +AVRO_DECL ValidSchema compileJsonSchemaWithNamedReferences(std::istream &is, + const std::map &namedReferences) { + if (!is.good()) { + throw Exception("Input stream is not good"); + } + + std::unique_ptr in = istreamInputStream(is); + json::Entity e = json::loadEntity(*in); + + // Convert the map to SymbolTable (map) + SymbolTable st; + for (const auto &entry : namedReferences) { + st[entry.first] = entry.second.root(); + } + + NodePtr n = makeNode(e, st, ""); + return ValidSchema(n); +} + } // namespace avro diff --git a/lang/c++/impl/CustomAttributes.cc b/lang/c++/impl/CustomAttributes.cc new file mode 100644 index 00000000000..4c139ba5aca --- /dev/null +++ b/lang/c++/impl/CustomAttributes.cc @@ -0,0 +1,62 @@ + +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "CustomAttributes.hh" +#include "Exception.hh" +#include +#include + +namespace avro { + +std::optional CustomAttributes::getAttribute(const std::string &name) const { + std::optional result; + std::map::const_iterator iter = + attributes_.find(name); + if (iter == attributes_.end()) { + return result; + } + result = iter->second; + return result; +} + +void CustomAttributes::addAttribute(const std::string &name, + const std::string &value, + bool addQuotes) { + auto iter_and_find = + attributes_.insert(std::pair(name, value)); + if (!iter_and_find.second) { + throw Exception(name + " already exists and cannot be added"); + } + if (addQuotes) { + keysNeedQuotes_.insert(name); + } +} + +void CustomAttributes::printJson(std::ostream &os, + const std::string &name) const { + auto iter = attributes_.find(name); + if (iter == attributes_.cend()) { + throw Exception(name + " doesn't exist"); + } + if (keysNeedQuotes_.find(name) != keysNeedQuotes_.cend()) { + os << "\"" << name << "\": \"" << iter->second << "\""; + } else { + os << "\"" << name << "\": " << iter->second; + } +} +} // namespace avro diff --git a/lang/c++/impl/DataFile.cc b/lang/c++/impl/DataFile.cc index 18fb3f61a68..3a93a5fdf53 100644 --- a/lang/c++/impl/DataFile.cc +++ b/lang/c++/impl/DataFile.cc @@ -20,18 +20,20 @@ #include "Compiler.hh" #include "Exception.hh" +#include #include -#include // for boost::crc_32_type -#include -#include -#include -#include - #ifdef SNAPPY_CODEC_AVAILABLE #include #endif +#ifdef ZSTD_CODEC_AVAILABLE +#include "ZstdCompressWrapper.hh" +#include "ZstdDecompressWrapper.hh" +#endif + +#include + namespace avro { using std::copy; using std::istringstream; @@ -45,71 +47,195 @@ using std::array; namespace { const string AVRO_SCHEMA_KEY("avro.schema"); const string AVRO_CODEC_KEY("avro.codec"); -const string AVRO_NULL_CODEC("null"); -const string AVRO_DEFLATE_CODEC("deflate"); +const size_t minSyncInterval = 32; +const size_t maxSyncInterval = 1u << 30; + +// Recommended by https://www.zlib.net/zlib_how.html +const size_t zlibBufGrowSize = 128 * 1024; + +template +struct codec_trait { + static std::string name() { + throw Exception("Unsupported codec: {}", static_cast(codec)); + } + static void validate(std::optional level) { + throw Exception("Unsupported codec: {}", static_cast(codec)); + } + static bool available() { + throw Exception("Unsupported codec: {}", static_cast(codec)); + } +}; + +template<> +struct codec_trait { + static std::string name() { + return "null"; + } + static void validate(std::optional /*level*/) {} + static bool available() { + return true; + } +}; + +template<> +struct codec_trait { + static std::string name() { + return "deflate"; + } + + static void validate(std::optional level) { + if (!level.has_value()) { + return; + } + int levelValue = level.value(); + if (levelValue < 0 || levelValue > 9) { + throw Exception("Invalid compression level {} for deflate codec. " + "Valid range is 0-9.", + levelValue); + } + } + + static bool available() { + return true; + } +}; + +template<> +struct codec_trait { + static std::string name() { + return "snappy"; + } + + static void validate(std::optional /*level*/) { + } + + static bool available() { #ifdef SNAPPY_CODEC_AVAILABLE -const string AVRO_SNAPPY_CODEC = "snappy"; + return true; +#else + return false; #endif + } +}; -const size_t minSyncInterval = 32; -const size_t maxSyncInterval = 1u << 30; +template<> +struct codec_trait { + static std::string name() { + return "zstandard"; + } + + static void validate(std::optional level) { + if (!level.has_value()) { + return; + } + int levelValue = level.value(); + if (levelValue < 1 || levelValue > 22) { + throw Exception("Invalid compression level {} for zstandard codec. " + "Valid range is 1-22.", + levelValue); + } + } + + static bool available() { +#ifdef ZSTD_CODEC_AVAILABLE + return true; +#else + return false; +#endif + } +}; + +#define DISPATCH_CODEC_FUNC(codec, func, ...) \ + switch (codec) { \ + case NULL_CODEC: \ + return codec_trait::func(__VA_ARGS__); \ + case DEFLATE_CODEC: \ + return codec_trait::func(__VA_ARGS__); \ + case SNAPPY_CODEC: \ + return codec_trait::func(__VA_ARGS__); \ + case ZSTD_CODEC: \ + return codec_trait::func(__VA_ARGS__); \ + default: \ + throw Exception("Unknown codec: {}", static_cast(codec)); \ + } -boost::iostreams::zlib_params get_zlib_params() { - boost::iostreams::zlib_params ret; - ret.method = boost::iostreams::zlib::deflated; - ret.noheader = true; - return ret; +std::string getCodecName(Codec codec) { + DISPATCH_CODEC_FUNC(codec, name); } + +void validateCodec(Codec codec, std::optional level) { + if (!isCodecAvailable(codec)) { + throw Exception("Codec {} is not available.", getCodecName(codec)); + } + DISPATCH_CODEC_FUNC(codec, validate, level); +} + +Codec getCodec(const std::string &name) { + if (name == codec_trait::name()) { + return NULL_CODEC; + } else if (name == codec_trait::name()) { + return DEFLATE_CODEC; + } else if (name == codec_trait::name()) { + return SNAPPY_CODEC; + } else if (name == codec_trait::name()) { + return ZSTD_CODEC; + } else { + throw Exception("Unknown codec name: {}", name); + } +} + } // namespace +bool isCodecAvailable(Codec codec) { + DISPATCH_CODEC_FUNC(codec, available); +} + +#undef DISPATCH_CODEC_FUNC + DataFileWriterBase::DataFileWriterBase(const char *filename, const ValidSchema &schema, size_t syncInterval, - Codec codec) : filename_(filename), - schema_(schema), - encoderPtr_(binaryEncoder()), - syncInterval_(syncInterval), - codec_(codec), - stream_(fileOutputStream(filename)), - buffer_(memoryOutputStream()), - sync_(makeSync()), - objectCount_(0), - lastSync_(0) { + Codec codec, const Metadata &metadata, + std::optional compressionLevel) : filename_(filename), + schema_(schema), + encoderPtr_(binaryEncoder()), + syncInterval_(syncInterval), + codec_(codec), + compressionLevel_(compressionLevel), + stream_(fileOutputStream(filename)), + buffer_(memoryOutputStream()), + sync_(makeSync()), + objectCount_(0), + metadata_(metadata), + lastSync_(0) { init(schema, syncInterval, codec); } -DataFileWriterBase::DataFileWriterBase(std::unique_ptr outputStream, - const ValidSchema &schema, size_t syncInterval, Codec codec) : filename_(), - schema_(schema), - encoderPtr_(binaryEncoder()), - syncInterval_(syncInterval), - codec_(codec), - stream_(std::move(outputStream)), - buffer_(memoryOutputStream()), - sync_(makeSync()), - objectCount_(0), - lastSync_(0) { +DataFileWriterBase::DataFileWriterBase(std::unique_ptr outputStream, const ValidSchema &schema, + size_t syncInterval, Codec codec, const Metadata &metadata, + std::optional compressionLevel) : filename_(), + schema_(schema), + encoderPtr_(binaryEncoder()), + syncInterval_(syncInterval), + codec_(codec), + compressionLevel_(compressionLevel), + stream_(std::move(outputStream)), + buffer_(memoryOutputStream()), + sync_(makeSync()), + objectCount_(0), + metadata_(metadata), + lastSync_(0) { init(schema, syncInterval, codec); } void DataFileWriterBase::init(const ValidSchema &schema, size_t syncInterval, const Codec &codec) { if (syncInterval < minSyncInterval || syncInterval > maxSyncInterval) { - throw Exception(boost::format("Invalid sync interval: %1%. " - "Should be between %2% and %3%") - % syncInterval % minSyncInterval % maxSyncInterval); + throw Exception( + "Invalid sync interval: {}. Should be between {} and {}", + syncInterval, minSyncInterval, maxSyncInterval); } - setMetadata(AVRO_CODEC_KEY, AVRO_NULL_CODEC); - if (codec_ == NULL_CODEC) { - setMetadata(AVRO_CODEC_KEY, AVRO_NULL_CODEC); - } else if (codec_ == DEFLATE_CODEC) { - setMetadata(AVRO_CODEC_KEY, AVRO_DEFLATE_CODEC); -#ifdef SNAPPY_CODEC_AVAILABLE - } else if (codec_ == SNAPPY_CODEC) { - setMetadata(AVRO_CODEC_KEY, AVRO_SNAPPY_CODEC); -#endif - } else { - throw Exception(boost::format("Unknown codec: %1%") % codec); - } + validateCodec(codec, compressionLevel_); + setMetadata(AVRO_CODEC_KEY, getCodecName(codec)); setMetadata(AVRO_SCHEMA_KEY, schema.toJson(false)); writeHeader(); @@ -120,7 +246,9 @@ void DataFileWriterBase::init(const ValidSchema &schema, size_t syncInterval, co DataFileWriterBase::~DataFileWriterBase() { if (stream_) { - close(); + try { + close(); + } catch (...) {} } } @@ -141,21 +269,48 @@ void DataFileWriterBase::sync() { std::unique_ptr in = memoryInputStream(*buffer_); copy(*in, *stream_); } else if (codec_ == DEFLATE_CODEC) { - std::vector buf; + std::vector buf; { - boost::iostreams::filtering_ostream os; - os.push(boost::iostreams::zlib_compressor(get_zlib_params())); - os.push(boost::iostreams::back_inserter(buf)); - const uint8_t *data; - size_t len; + z_stream zs; + zs.zalloc = Z_NULL; + zs.zfree = Z_NULL; + zs.opaque = Z_NULL; + + // Use Z_DEFAULT_COMPRESSION if no level specified + int effectiveLevel = compressionLevel_.value_or(Z_DEFAULT_COMPRESSION); + + int ret = deflateInit2(&zs, effectiveLevel, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY); + if (ret != Z_OK) { + throw Exception("Failed to initialize deflate, error: {}", ret); + } std::unique_ptr input = memoryInputStream(*buffer_); - while (input->next(&data, &len)) { - boost::iostreams::write(os, reinterpret_cast(data), len); + const uint8_t *data; + size_t len; + while (ret != Z_STREAM_END && input->next(&data, &len)) { + zs.avail_in = static_cast(len); + zs.next_in = const_cast(data); + bool flush = (zs.total_in + len) >= buffer_->byteCount(); + do { + if (zs.total_out == buf.size()) { + buf.resize(buf.size() + zlibBufGrowSize); + } + zs.avail_out = static_cast(buf.size() - zs.total_out); + zs.next_out = buf.data() + zs.total_out; + ret = deflate(&zs, flush ? Z_FINISH : Z_NO_FLUSH); + if (ret == Z_STREAM_END) { + break; + } + if (ret != Z_OK) { + throw Exception("Failed to deflate, error: {}", ret); + } + } while (zs.avail_out == 0); } + + buf.resize(zs.total_out); + (void) deflateEnd(&zs); } // make sure all is flushed - std::unique_ptr in = memoryInputStream( - reinterpret_cast(buf.data()), buf.size()); + std::unique_ptr in = memoryInputStream(buf.data(), buf.size()); int64_t byteCount = buf.size(); avro::encode(*encoderPtr_, byteCount); encoderPtr_->flush(); @@ -164,45 +319,59 @@ void DataFileWriterBase::sync() { } else if (codec_ == SNAPPY_CODEC) { std::vector temp; std::string compressed; - boost::crc_32_type crc; - { - boost::iostreams::filtering_ostream os; - os.push(boost::iostreams::back_inserter(temp)); - const uint8_t *data; - size_t len; - std::unique_ptr input = memoryInputStream(*buffer_); - while (input->next(&data, &len)) { - boost::iostreams::write(os, reinterpret_cast(data), - len); - } - } // make sure all is flushed + const uint8_t *data; + size_t len; + std::unique_ptr input = memoryInputStream(*buffer_); + while (input->next(&data, &len)) { + temp.insert(temp.end(), reinterpret_cast(data), + reinterpret_cast(data) + len); + } - crc.process_bytes(reinterpret_cast(temp.data()), - temp.size()); // For Snappy, add the CRC32 checksum - int32_t checksum = crc(); + auto checksum = crc32(0, reinterpret_cast(temp.data()), + static_cast(temp.size())); // Now compress size_t compressed_size = snappy::Compress( reinterpret_cast(temp.data()), temp.size(), &compressed); + temp.clear(); - { - boost::iostreams::filtering_ostream os; - os.push(boost::iostreams::back_inserter(temp)); - boost::iostreams::write(os, compressed.c_str(), compressed_size); - } - temp.push_back((checksum >> 24) & 0xFF); - temp.push_back((checksum >> 16) & 0xFF); - temp.push_back((checksum >> 8) & 0xFF); - temp.push_back(checksum & 0xFF); + temp.insert(temp.end(), compressed.c_str(), + compressed.c_str() + compressed_size); + + temp.push_back(static_cast((checksum >> 24) & 0xFF)); + temp.push_back(static_cast((checksum >> 16) & 0xFF)); + temp.push_back(static_cast((checksum >> 8) & 0xFF)); + temp.push_back(static_cast(checksum & 0xFF)); std::unique_ptr in = memoryInputStream( reinterpret_cast(temp.data()), temp.size()); int64_t byteCount = temp.size(); avro::encode(*encoderPtr_, byteCount); encoderPtr_->flush(); copy(*in, *stream_); +#endif +#ifdef ZSTD_CODEC_AVAILABLE + } else if (codec_ == ZSTD_CODEC) { + // Read all uncompressed data into a single buffer + std::vector uncompressed; + const uint8_t *data; + size_t len; + std::unique_ptr input = memoryInputStream(*buffer_); + while (input->next(&data, &len)) { + uncompressed.insert(uncompressed.end(), reinterpret_cast(data), + reinterpret_cast(data) + len); + } + + ZstdCompressWrapper zstdCompressWrapper; + std::vector compressed = zstdCompressWrapper.compress(uncompressed, compressionLevel_); + + std::unique_ptr in = memoryInputStream( + reinterpret_cast(compressed.data()), compressed.size()); + avro::encode(*encoderPtr_, static_cast(compressed.size())); + encoderPtr_->flush(); + copy(*in, *stream_); #endif } @@ -232,9 +401,8 @@ void DataFileWriterBase::flush() { sync(); } -boost::mt19937 random(static_cast(time(nullptr))); - DataFileSync DataFileWriterBase::makeSync() { + std::mt19937 random(static_cast(time(nullptr))); DataFileSync sync; std::generate(sync.begin(), sync.end(), random); return sync; @@ -257,14 +425,14 @@ void DataFileWriterBase::setMetadata(const string &key, const string &value) { metadata_[key] = v; } -DataFileReaderBase::DataFileReaderBase(const char *filename) : filename_(filename), codec_(NULL_CODEC), stream_(fileSeekableInputStream(filename)), - decoder_(binaryDecoder()), objectCount_(0), eof_(false), blockStart_(-1), - blockEnd_(-1) { +DataFileReaderBase::DataFileReaderBase(const char *filename) : filename_(filename), stream_(fileSeekableInputStream(filename)), + decoder_(binaryDecoder()), objectCount_(0), eof_(false), + codec_(NULL_CODEC), blockStart_(-1), blockEnd_(-1) { readHeader(); } -DataFileReaderBase::DataFileReaderBase(std::unique_ptr inputStream) : codec_(NULL_CODEC), stream_(std::move(inputStream)), - decoder_(binaryDecoder()), objectCount_(0), eof_(false) { +DataFileReaderBase::DataFileReaderBase(std::unique_ptr inputStream) : stream_(std::move(inputStream)), + decoder_(binaryDecoder()), objectCount_(0), eof_(false), codec_(NULL_CODEC) { readHeader(); } @@ -283,8 +451,7 @@ void DataFileReaderBase::init(const ValidSchema &readerSchema) { static void drain(InputStream &in) { const uint8_t *p = nullptr; size_t n = 0; - while (in.next(&p, &n)) - ; + while (in.next(&p, &n)); } char hex(unsigned int x) { @@ -382,7 +549,6 @@ void DataFileReaderBase::readDataBlock() { dataStream_ = std::move(st); #ifdef SNAPPY_CODEC_AVAILABLE } else if (codec_ == SNAPPY_CODEC) { - boost::crc_32_type crc; uint32_t checksum = 0; compressed_.clear(); uncompressed.clear(); @@ -392,6 +558,9 @@ void DataFileReaderBase::readDataBlock() { compressed_.insert(compressed_.end(), data, data + len); } len = compressed_.size(); + if (len < 4) + throw Exception("Cannot read compressed data, expected at least 4 bytes, got " + std::to_string(len)); + int b1 = compressed_[len - 4] & 0xFF; int b2 = compressed_[len - 3] & 0xFF; int b3 = compressed_[len - 2] & 0xFF; @@ -403,41 +572,98 @@ void DataFileReaderBase::readDataBlock() { throw Exception( "Snappy Compression reported an error when decompressing"); } - crc.process_bytes(uncompressed.c_str(), uncompressed.size()); - uint32_t c = crc(); + auto c = crc32(0, reinterpret_cast(uncompressed.c_str()), + static_cast(uncompressed.size())); if (checksum != c) { throw Exception( - boost::format("Checksum did not match for Snappy compression: Expected: %1%, computed: %2%") % checksum - % c); + "Checksum did not match for Snappy compression: Expected: {}, computed: {}", + checksum, c); } - os_.reset(new boost::iostreams::filtering_istream()); - os_->push( - boost::iostreams::basic_array_source(uncompressed.c_str(), - uncompressed.size())); - std::unique_ptr in = istreamInputStream(*os_); + + std::unique_ptr in = memoryInputStream( + reinterpret_cast(uncompressed.c_str()), + uncompressed.size()); dataDecoder_->init(*in); dataStream_ = std::move(in); #endif - } else { +#ifdef ZSTD_CODEC_AVAILABLE + } else if (codec_ == ZSTD_CODEC) { compressed_.clear(); + uncompressed.clear(); const uint8_t *data; size_t len; while (st->next(&data, &len)) { compressed_.insert(compressed_.end(), data, data + len); } - os_.reset(new boost::iostreams::filtering_istream()); - os_->push(boost::iostreams::zlib_decompressor(get_zlib_params())); - os_->push(boost::iostreams::basic_array_source( - compressed_.data(), compressed_.size())); - std::unique_ptr in = nonSeekableIstreamInputStream(*os_); + ZstdDecompressWrapper zstdDecompressWrapper; + uncompressed = zstdDecompressWrapper.decompress(compressed_); + + std::unique_ptr in = memoryInputStream( + reinterpret_cast(uncompressed.data()), + uncompressed.size()); + + dataDecoder_->init(*in); + dataStream_ = std::move(in); +#endif + } else { + compressed_.clear(); + uncompressed.clear(); + + { + z_stream zs; + zs.zalloc = Z_NULL; + zs.zfree = Z_NULL; + zs.opaque = Z_NULL; + zs.avail_in = 0; + zs.next_in = Z_NULL; + + int ret = inflateInit2(&zs, /*windowBits=*/-15); + if (ret != Z_OK) { + throw Exception("Failed to initialize inflate, error: {}", ret); + } + + const uint8_t *data; + size_t len; + while (ret != Z_STREAM_END && st->next(&data, &len)) { + zs.avail_in = static_cast(len); + zs.next_in = const_cast(data); + do { + if (zs.total_out == uncompressed.size()) { + uncompressed.resize(uncompressed.size() + zlibBufGrowSize); + } + zs.avail_out = static_cast(uncompressed.size() - zs.total_out); + zs.next_out = reinterpret_cast(uncompressed.data() + zs.total_out); + ret = inflate(&zs, Z_NO_FLUSH); + if (ret == Z_STREAM_END) { + break; + } + if (ret != Z_OK) { + throw Exception("Failed to inflate, error: {}", ret); + } + } while (zs.avail_out == 0); + } + + uncompressed.resize(zs.total_out); + (void) inflateEnd(&zs); + } + + std::unique_ptr in = memoryInputStream( + reinterpret_cast(uncompressed.c_str()), + uncompressed.size()); + dataDecoder_->init(*in); dataStream_ = std::move(in); } } void DataFileReaderBase::close() { + stream_.reset(); + eof_ = true; + objectCount_ = 0; + blockStart_ = 0; + blockEnd_ = 0; } static string toString(const vector &v) { @@ -451,7 +677,7 @@ static ValidSchema makeSchema(const vector &v) { istringstream iss(toString(v)); ValidSchema vs; compileJsonSchema(iss, vs); - return ValidSchema(vs); + return vs; } void DataFileReaderBase::readHeader() { @@ -473,19 +699,16 @@ void DataFileReaderBase::readHeader() { readerSchema_ = dataSchema(); } + // Parse codec from metadata using codec_trait it = metadata_.find(AVRO_CODEC_KEY); - if (it != metadata_.end() && toString(it->second) == AVRO_DEFLATE_CODEC) { - codec_ = DEFLATE_CODEC; -#ifdef SNAPPY_CODEC_AVAILABLE - } else if (it != metadata_.end() - && toString(it->second) == AVRO_SNAPPY_CODEC) { - codec_ = SNAPPY_CODEC; -#endif + if (it != metadata_.end()) { + const auto codecName = toString(it->second); + codec_ = getCodec(codecName); + if (!isCodecAvailable(codec_)) { + throw Exception("Codec {} is not available.", codecName); + } } else { codec_ = NULL_CODEC; - if (it != metadata_.end() && toString(it->second) != AVRO_NULL_CODEC) { - throw Exception("Unknown codec in data file: " + toString(it->second)); - } } avro::decode(*decoder_, sync_); @@ -523,8 +746,7 @@ void DataFileReaderBase::sync(int64_t position) { eof_ = true; return; } - int len = - std::min(static_cast(SyncSize - i), n); + size_t len = std::min(SyncSize - i, n); memcpy(&sync_buffer[i], p, len); p += len; n -= len; diff --git a/lang/c++/impl/FileStream.cc b/lang/c++/impl/FileStream.cc index 749fd835abd..9063cf1f734 100644 --- a/lang/c++/impl/FileStream.cc +++ b/lang/c++/impl/FileStream.cc @@ -49,9 +49,9 @@ struct BufferCopyIn { struct FileBufferCopyIn : public BufferCopyIn { #ifdef _WIN32 HANDLE h_; - FileBufferCopyIn(const char *filename) : h_(::CreateFileA(filename, GENERIC_READ, 0, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL)) { + explicit FileBufferCopyIn(const char *filename) : h_(::CreateFileA(filename, GENERIC_READ, 0, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL)) { if (h_ == INVALID_HANDLE_VALUE) { - throw Exception(boost::format("Cannot open file: %1%") % ::GetLastError()); + throw Exception("Cannot open file: {}", ::GetLastError()); } } @@ -59,16 +59,16 @@ struct FileBufferCopyIn : public BufferCopyIn { ::CloseHandle(h_); } - void seek(size_t len) { + void seek(size_t len) override { if (::SetFilePointer(h_, len, NULL, FILE_CURRENT) == INVALID_SET_FILE_POINTER && ::GetLastError() != NO_ERROR) { - throw Exception(boost::format("Cannot skip file: %1%") % ::GetLastError()); + throw Exception("Cannot skip file: {}", ::GetLastError()); } } - bool read(uint8_t *b, size_t toRead, size_t &actual) { + bool read(uint8_t *b, size_t toRead, size_t &actual) override { DWORD dw = 0; if (!::ReadFile(h_, b, toRead, &dw, NULL)) { - throw Exception(boost::format("Cannot read file: %1%") % ::GetLastError()); + throw Exception("Cannot read file: {}", ::GetLastError()); } actual = static_cast(dw); return actual != 0; @@ -78,7 +78,7 @@ struct FileBufferCopyIn : public BufferCopyIn { explicit FileBufferCopyIn(const char *filename) : fd_(open(filename, O_RDONLY | O_BINARY)) { if (fd_ < 0) { - throw Exception(boost::format("Cannot open file: %1%") % ::strerror(errno)); + throw Exception("Cannot open file: {}", strerror(errno)); } } @@ -89,12 +89,12 @@ struct FileBufferCopyIn : public BufferCopyIn { void seek(size_t len) final { off_t r = ::lseek(fd_, len, SEEK_CUR); if (r == static_cast(-1)) { - throw Exception(boost::format("Cannot skip file: %1%") % strerror(errno)); + throw Exception("Cannot skip file: {}", strerror(errno)); } } bool read(uint8_t *b, size_t toRead, size_t &actual) final { - int n = ::read(fd_, b, toRead); + auto n = ::read(fd_, b, toRead); if (n > 0) { actual = n; return true; @@ -232,9 +232,9 @@ struct BufferCopyOut { struct FileBufferCopyOut : public BufferCopyOut { #ifdef _WIN32 HANDLE h_; - FileBufferCopyOut(const char *filename) : h_(::CreateFileA(filename, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL)) { + explicit FileBufferCopyOut(const char *filename) : h_(::CreateFileA(filename, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL)) { if (h_ == INVALID_HANDLE_VALUE) { - throw Exception(boost::format("Cannot open file: %1%") % ::GetLastError()); + throw Exception("Cannot open file: {}", ::GetLastError()); } } @@ -242,11 +242,11 @@ struct FileBufferCopyOut : public BufferCopyOut { ::CloseHandle(h_); } - void write(const uint8_t *b, size_t len) { + void write(const uint8_t *b, size_t len) override { while (len > 0) { DWORD dw = 0; if (!::WriteFile(h_, b, len, &dw, NULL)) { - throw Exception(boost::format("Cannot read file: %1%") % ::GetLastError()); + throw Exception("Cannot read file: {}", ::GetLastError()); } b += dw; len -= dw; @@ -258,7 +258,7 @@ struct FileBufferCopyOut : public BufferCopyOut { explicit FileBufferCopyOut(const char *filename) : fd_(::open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644)) { if (fd_ < 0) { - throw Exception(boost::format("Cannot open file: %1%") % ::strerror(errno)); + throw Exception("Cannot open file: {}", ::strerror(errno)); } } @@ -268,7 +268,7 @@ struct FileBufferCopyOut : public BufferCopyOut { void write(const uint8_t *b, size_t len) final { if (::write(fd_, b, len) < 0) { - throw Exception(boost::format("Cannot write file: %1%") % ::strerror(errno)); + throw Exception("Cannot write file: {}", ::strerror(errno)); } } #endif diff --git a/lang/c++/impl/Generic.cc b/lang/c++/impl/Generic.cc index 6e0436ae344..1535c604be7 100644 --- a/lang/c++/impl/Generic.cc +++ b/lang/c++/impl/Generic.cc @@ -29,7 +29,7 @@ typedef vector bytes; void GenericContainer::assertType(const NodePtr &schema, Type type) { if (schema->type() != type) { - throw Exception(boost::format("Schema type %1 expected %2") % toString(schema->type()) % toString(type)); + throw Exception("Schema type {} expected {}", schema->type(), type); } } @@ -129,7 +129,7 @@ void GenericReader::read(GenericDatum &datum, Decoder &d, bool isResolving) { } } break; default: - throw Exception(boost::format("Unknown schema type %1%") % toString(datum.type())); + throw Exception("Unknown schema type {}", datum.type()); } } @@ -217,7 +217,7 @@ void GenericWriter::write(const GenericDatum &datum, Encoder &e) { e.mapEnd(); } break; default: - throw Exception(boost::format("Unknown schema type %1%") % toString(datum.type())); + throw Exception("Unknown schema type {}", datum.type()); } } diff --git a/lang/c++/impl/GenericDatum.cc b/lang/c++/impl/GenericDatum.cc index 7b2bf93bca9..49700a927f5 100644 --- a/lang/c++/impl/GenericDatum.cc +++ b/lang/c++/impl/GenericDatum.cc @@ -83,7 +83,7 @@ void GenericDatum::init(const NodePtr &schema) { value_ = GenericUnion(sc); break; default: - throw Exception(boost::format("Unknown schema type %1%") % toString(type_)); + throw Exception("Unknown schema type {}", toString(type_)); } } diff --git a/lang/c++/impl/LogicalType.cc b/lang/c++/impl/LogicalType.cc index 1aa24bf20de..f59c500a3a9 100644 --- a/lang/c++/impl/LogicalType.cc +++ b/lang/c++/impl/LogicalType.cc @@ -22,28 +22,35 @@ namespace avro { LogicalType::LogicalType(Type type) - : type_(type), precision_(0), scale_(0) {} + : type_(type), precision_(0), scale_(0), custom_(nullptr) { + if (type == CUSTOM) { + throw Exception("Logical type CUSTOM must be initialized with a custom logical type"); + } +} + +LogicalType::LogicalType(std::shared_ptr custom) + : type_(CUSTOM), precision_(0), scale_(0), custom_(std::move(custom)) {} LogicalType::Type LogicalType::type() const { return type_; } -void LogicalType::setPrecision(int precision) { +void LogicalType::setPrecision(int32_t precision) { if (type_ != DECIMAL) { throw Exception("Only logical type DECIMAL can have precision"); } if (precision <= 0) { - throw Exception(boost::format("Precision cannot be: %1%") % precision); + throw Exception("Precision cannot be: {}", precision); } precision_ = precision; } -void LogicalType::setScale(int scale) { +void LogicalType::setScale(int32_t scale) { if (type_ != DECIMAL) { throw Exception("Only logical type DECIMAL can have scale"); } if (scale < 0) { - throw Exception(boost::format("Scale cannot be: %1%") % scale); + throw Exception("Scale cannot be: {}", scale); } scale_ = scale; } @@ -51,6 +58,9 @@ void LogicalType::setScale(int scale) { void LogicalType::printJson(std::ostream &os) const { switch (type_) { case LogicalType::NONE: break; + case LogicalType::BIG_DECIMAL: + os << R"("logicalType": "big-decimal")"; + break; case LogicalType::DECIMAL: os << R"("logicalType": "decimal")"; os << ", \"precision\": " << precision_; @@ -71,13 +81,51 @@ void LogicalType::printJson(std::ostream &os) const { case TIMESTAMP_MICROS: os << R"("logicalType": "timestamp-micros")"; break; + case TIMESTAMP_NANOS: + os << R"("logicalType": "timestamp-nanos")"; + break; + case LOCAL_TIMESTAMP_MILLIS: + os << R"("logicalType": "local-timestamp-millis")"; + break; + case LOCAL_TIMESTAMP_MICROS: + os << R"("logicalType": "local-timestamp-micros")"; + break; + case LOCAL_TIMESTAMP_NANOS: + os << R"("logicalType": "local-timestamp-nanos")"; + break; case DURATION: os << R"("logicalType": "duration")"; break; case UUID: os << R"("logicalType": "uuid")"; break; + case CUSTOM: + custom_->printJson(os); + break; + } +} + +void CustomLogicalType::printJson(std::ostream &os) const { + os << R"("logicalType": ")" << name_ << "\""; +} + +CustomLogicalTypeRegistry &CustomLogicalTypeRegistry::instance() { + static CustomLogicalTypeRegistry instance; + return instance; +} + +void CustomLogicalTypeRegistry::registerType(const std::string &name, Factory factory) { + std::lock_guard lock(mutex_); + registry_[name] = factory; +} + +std::shared_ptr CustomLogicalTypeRegistry::create(const std::string &name, const std::string &json) const { + std::lock_guard lock(mutex_); + auto it = registry_.find(name); + if (it == registry_.end()) { + return nullptr; } + return it->second(json); } } // namespace avro diff --git a/lang/c++/impl/Node.cc b/lang/c++/impl/Node.cc index 46310d0f9ef..0a5fcfc1b0b 100644 --- a/lang/c++/impl/Node.cc +++ b/lang/c++/impl/Node.cc @@ -16,7 +16,9 @@ * limitations under the License. */ +#include #include +#include #include "Node.hh" @@ -26,12 +28,44 @@ using std::string; Node::~Node() = default; +struct Name::Aliases { + std::vector raw; + std::unordered_set fullyQualified; +}; + +Name::Name() = default; + Name::Name(const std::string &name) { fullname(name); } +Name::Name(std::string simpleName, std::string ns) : ns_(std::move(ns)), simpleName_(std::move(simpleName)) { + check(); +} + +Name::Name(const Name &other) { + *this = other; +} + +Name &Name::operator=(const Name &other) { + if (this != &other) { + ns_ = other.ns_; + simpleName_ = other.simpleName_; + if (other.aliases_) { + aliases_ = std::make_unique(*other.aliases_); + } + } + return *this; +} + +Name::Name(Name &&other) = default; + +Name &Name::operator=(Name &&other) = default; + +Name::~Name() = default; + string Name::fullname() const { - return (ns_.empty()) ? simpleName_ : ns_ + "." + simpleName_; + return ns_.empty() ? simpleName_ : ns_ + "." + simpleName_; } void Name::fullname(const string &name) { @@ -46,6 +80,23 @@ void Name::fullname(const string &name) { check(); } +const std::vector &Name::aliases() const { + static const std::vector emptyAliases; + return aliases_ ? aliases_->raw : emptyAliases; +} + +void Name::addAlias(const std::string &alias) { + if (!aliases_) { + aliases_ = std::make_unique(); + } + aliases_->raw.push_back(alias); + if (!ns_.empty() && alias.find_last_of('.') == string::npos) { + aliases_->fullyQualified.emplace(ns_ + "." + alias); + } else { + aliases_->fullyQualified.insert(alias); + } +} + bool Name::operator<(const Name &n) const { return (ns_ < n.ns_) || (!(n.ns_ < ns_) && (simpleName_ < n.simpleName_)); } @@ -72,12 +123,29 @@ bool Name::operator==(const Name &n) const { return ns_ == n.ns_ && simpleName_ == n.simpleName_; } +bool Name::equalOrAliasedBy(const Name &n) const { + return *this == n || (n.aliases_ && n.aliases_->fullyQualified.find(fullname()) != n.aliases_->fullyQualified.end()); +} + +void Name::clear() { + ns_.clear(); + simpleName_.clear(); + aliases_.reset(); +} + void Node::setLogicalType(LogicalType logicalType) { checkLock(); // Check that the logical type is applicable to the node type. switch (logicalType.type()) { case LogicalType::NONE: break; + case LogicalType::BIG_DECIMAL: { + if (type_ != AVRO_BYTES) { + throw Exception("BIG_DECIMAL logical type can annotate " + "only BYTES type"); + } + break; + } case LogicalType::DECIMAL: { if (type_ != AVRO_BYTES && type_ != AVRO_FIXED) { throw Exception("DECIMAL logical type can annotate " @@ -86,14 +154,13 @@ void Node::setLogicalType(LogicalType logicalType) { if (type_ == AVRO_FIXED) { // Max precision that can be supported by the current size of // the FIXED type. - long maxPrecision = floor(log10(2.0) * (8.0 * fixedSize() - 1)); + auto maxPrecision = static_cast(floor(log10(2.0) * (8.0 * static_cast(fixedSize()) - 1))); if (logicalType.precision() > maxPrecision) { throw Exception( - boost::format( - "DECIMAL precision %1% is too large for the " - "FIXED type of size %2%, precision cannot be " - "larger than %3%") - % logicalType.precision() % fixedSize() % maxPrecision); + "DECIMAL precision {} is too large for the " + "FIXED type of size {}, precision cannot be " + "larger than {}", + logicalType.precision(), fixedSize(), maxPrecision); } } if (logicalType.scale() > logicalType.precision()) { @@ -130,6 +197,30 @@ void Node::setLogicalType(LogicalType logicalType) { "LONG type"); } break; + case LogicalType::TIMESTAMP_NANOS: + if (type_ != AVRO_LONG) { + throw Exception("TIMESTAMP-NANOS logical type can only annotate " + "LONG type"); + } + break; + case LogicalType::LOCAL_TIMESTAMP_MILLIS: + if (type_ != AVRO_LONG) { + throw Exception("LOCAL-TIMESTAMP-MILLIS logical type can only annotate " + "LONG type"); + } + break; + case LogicalType::LOCAL_TIMESTAMP_MICROS: + if (type_ != AVRO_LONG) { + throw Exception("LOCAL-TIMESTAMP-MICROS logical type can only annotate " + "LONG type"); + } + break; + case LogicalType::LOCAL_TIMESTAMP_NANOS: + if (type_ != AVRO_LONG) { + throw Exception("LOCAL-TIMESTAMP-NANOS logical type can only annotate " + "LONG type"); + } + break; case LogicalType::DURATION: if (type_ != AVRO_FIXED || fixedSize() != 12) { throw Exception("DURATION logical type can only annotate " @@ -137,9 +228,14 @@ void Node::setLogicalType(LogicalType logicalType) { } break; case LogicalType::UUID: - if (type_ != AVRO_STRING) { + if (type_ != AVRO_STRING && (type_ != AVRO_FIXED || fixedSize() != 16)) { throw Exception("UUID logical type can only annotate " - "STRING type"); + "STRING type or FIXED type of length 16"); + } + break; + case LogicalType::CUSTOM: + if (logicalType.customLogicalType() == nullptr) { + throw Exception("CUSTOM logical type is not set"); } break; } diff --git a/lang/c++/impl/NodeImpl.cc b/lang/c++/impl/NodeImpl.cc index 810e1641ed9..e68eb365e2f 100644 --- a/lang/c++/impl/NodeImpl.cc +++ b/lang/c++/impl/NodeImpl.cc @@ -17,8 +17,13 @@ */ #include "NodeImpl.hh" -#include + +#include +#include +#include +#include #include +#include using std::string; namespace avro { @@ -71,7 +76,7 @@ string escape(const string &unescaped) { // Wrap an indentation in a struct for ostream operator<< struct indent { explicit indent(size_t depth) : d(depth) {} - int d; + size_t d; }; /// ostream operator for indent @@ -83,6 +88,18 @@ std::ostream &operator<<(std::ostream &os, indent x) { return os; } +void printCustomAttributes(const CustomAttributes &customAttributes, size_t depth, + std::ostream &os) { + std::map::const_iterator iter = + customAttributes.attributes().begin(); + while (iter != customAttributes.attributes().end()) { + os << ",\n" + << indent(depth); + customAttributes.printJson(os, iter->first); + ++iter; + } +} + } // anonymous namespace const int kByteStringSize = 6; @@ -101,7 +118,7 @@ NodePrimitive::resolve(const Node &reader) const { return RESOLVE_PROMOTABLE_TO_LONG; } - // fall-through intentional + [[fallthrough]]; case AVRO_LONG: @@ -109,7 +126,7 @@ NodePrimitive::resolve(const Node &reader) const { return RESOLVE_PROMOTABLE_TO_FLOAT; } - // fall-through intentional + [[fallthrough]]; case AVRO_FLOAT: @@ -240,20 +257,47 @@ static void printName(std::ostream &os, const Name &n, size_t depth) { os << indent(depth) << R"("name": ")" << n.simpleName() << "\",\n"; } +static void printLogicalType(std::ostream &os, const LogicalType &logicalType, size_t depth) { + if (logicalType.type() != LogicalType::NONE) { + os << indent(depth); + logicalType.printJson(os); + os << ",\n"; + } +} + void NodeRecord::printJson(std::ostream &os, size_t depth) const { os << "{\n"; os << indent(++depth) << "\"type\": \"record\",\n"; - printName(os, nameAttribute_.get(), depth); + printLogicalType(os, logicalType(), depth); + const Name &name = nameAttribute_.get(); + printName(os, name, depth); + + const auto &aliases = name.aliases(); + if (!aliases.empty()) { + os << indent(depth) << "\"aliases\": ["; + ++depth; + for (size_t i = 0; i < aliases.size(); ++i) { + if (i > 0) { + os << ','; + } + os << '\n' + << indent(depth) << "\"" << aliases[i] << "\""; + } + os << '\n' + << indent(--depth) << "]\n"; + } + if (!getDoc().empty()) { os << indent(depth) << R"("doc": ")" << escape(getDoc()) << "\",\n"; } - os << indent(depth) << "\"fields\": ["; + os << indent(depth) << "\"fields\": ["; size_t fields = leafAttributes_.size(); ++depth; - // Serialize "default" field: - assert(defaultValues.empty() || (defaultValues.size() == fields)); + assert(fieldsAliases_.empty() || (fieldsAliases_.size() == fields)); + assert(fieldsDefaultValues_.empty() || (fieldsDefaultValues_.size() == fields)); + assert(customAttributes_.size() == 0 || customAttributes_.size() == fields); for (size_t i = 0; i < fields; ++i) { if (i > 0) { os << ','; @@ -264,16 +308,37 @@ void NodeRecord::printJson(std::ostream &os, size_t depth) const { os << indent(depth) << "\"type\": "; leafAttributes_.get(i)->printJson(os, depth); - if (!defaultValues.empty()) { - if (!defaultValues[i].isUnion() && defaultValues[i].type() == AVRO_NULL) { + if (!fieldsAliases_.empty() && !fieldsAliases_[i].empty()) { + os << ",\n" + << indent(depth) << "\"aliases\": ["; + ++depth; + for (size_t j = 0; j < fieldsAliases_[i].size(); ++j) { + if (j > 0) { + os << ','; + } + os << '\n' + << indent(depth) << "\"" << fieldsAliases_[i][j] << "\""; + } + os << '\n' + << indent(--depth) << ']'; + } + + // Serialize "default" field: + if (!fieldsDefaultValues_.empty()) { + if (!fieldsDefaultValues_[i].isUnion() && fieldsDefaultValues_[i].type() == AVRO_NULL) { // No "default" field. } else { os << ",\n" << indent(depth) << "\"default\": "; - leafAttributes_.get(i)->printDefaultToJson(defaultValues[i], os, + leafAttributes_.get(i)->printDefaultToJson(fieldsDefaultValues_[i], os, depth); } } + + if (customAttributes_.size() == fields) { + printCustomAttributes(customAttributes_.get(i), depth, os); + } + os << '\n'; os << indent(--depth) << '}'; } @@ -283,7 +348,7 @@ void NodeRecord::printJson(std::ostream &os, size_t depth) const { } void NodePrimitive::printDefaultToJson(const GenericDatum &g, std::ostream &os, - size_t depth) const { + size_t) const { assert(isPrimitive(g.type())); switch (g.type()) { @@ -324,13 +389,13 @@ void NodePrimitive::printDefaultToJson(const GenericDatum &g, std::ostream &os, } void NodeEnum::printDefaultToJson(const GenericDatum &g, std::ostream &os, - size_t depth) const { + size_t) const { assert(g.type() == AVRO_ENUM); os << "\"" << g.value().symbol() << "\""; } void NodeFixed::printDefaultToJson(const GenericDatum &g, std::ostream &os, - size_t depth) const { + size_t) const { assert(g.type() == AVRO_FIXED); // ex: "\uOOff" // Convert to a string @@ -409,16 +474,38 @@ void NodeRecord::printDefaultToJson(const GenericDatum &g, std::ostream &os, << indent(--depth) << "}"; } } -NodeRecord::NodeRecord(const HasName &name, - const MultiLeaves &fields, - const LeafNames &fieldsNames, - std::vector dv) : NodeImplRecord(AVRO_RECORD, name, fields, fieldsNames, NoSize()), - defaultValues(std::move(dv)) { + +NodeRecord::NodeRecord(const HasName &name, const MultiLeaves &fields, + const LeafNames &fieldsNames, std::vector dv) + : NodeRecord(name, HasDoc(), fields, fieldsNames, {}, std::move(dv), MultiAttributes()) {} + +NodeRecord::NodeRecord(const HasName &name, const HasDoc &doc, const MultiLeaves &fields, + const LeafNames &fieldsNames, std::vector dv) + : NodeRecord(name, doc, fields, fieldsNames, {}, std::move(dv), MultiAttributes()) {} + +NodeRecord::NodeRecord(const HasName &name, const MultiLeaves &fields, + const LeafNames &fieldsNames, std::vector> fieldsAliases, + std::vector dv, const MultiAttributes &customAttributes) + : NodeRecord(name, HasDoc(), fields, fieldsNames, std::move(fieldsAliases), std::move(dv), customAttributes) {} + +NodeRecord::NodeRecord(const HasName &name, const HasDoc &doc, const MultiLeaves &fields, + const LeafNames &fieldsNames, std::vector> fieldsAliases, + std::vector dv, const MultiAttributes &customAttributes) + : NodeImplRecord(AVRO_RECORD, name, doc, fields, fieldsNames, customAttributes, NoSize()), + fieldsAliases_(std::move(fieldsAliases)), + fieldsDefaultValues_(std::move(dv)) { + for (size_t i = 0; i < leafNameAttributes_.size(); ++i) { if (!nameIndex_.add(leafNameAttributes_.get(i), i)) { - throw Exception(boost::format( - "Cannot add duplicate field: %1%") - % leafNameAttributes_.get(i)); + throw Exception("Cannot add duplicate field: {}", leafNameAttributes_.get(i)); + } + + if (!fieldsAliases_.empty()) { + for (const auto &alias : fieldsAliases_[i]) { + if (!nameIndex_.add(alias, i)) { + throw Exception("Cannot add duplicate field: {}", alias); + } + } } } } @@ -451,6 +538,7 @@ void NodeMap::printDefaultToJson(const GenericDatum &g, std::ostream &os, void NodeEnum::printJson(std::ostream &os, size_t depth) const { os << "{\n"; os << indent(++depth) << "\"type\": \"enum\",\n"; + printLogicalType(os, logicalType(), depth); if (!getDoc().empty()) { os << indent(depth) << R"("doc": ")" << escape(getDoc()) << "\",\n"; @@ -458,9 +546,9 @@ void NodeEnum::printJson(std::ostream &os, size_t depth) const { printName(os, nameAttribute_.get(), depth); os << indent(depth) << "\"symbols\": [\n"; - int names = leafNameAttributes_.size(); + auto names = leafNameAttributes_.size(); ++depth; - for (int i = 0; i < names; ++i) { + for (size_t i = 0; i < names; ++i) { if (i > 0) { os << ",\n"; } @@ -468,12 +556,16 @@ void NodeEnum::printJson(std::ostream &os, size_t depth) const { } os << '\n'; os << indent(--depth) << "]\n"; + for (size_t i = 0; i != customAttributes_.size(); i++) { + printCustomAttributes(customAttributes_.get(i), depth, os); + } os << indent(--depth) << '}'; } void NodeArray::printJson(std::ostream &os, size_t depth) const { os << "{\n"; os << indent(depth + 1) << "\"type\": \"array\",\n"; + printLogicalType(os, logicalType(), depth + 1); if (!getDoc().empty()) { os << indent(depth + 1) << R"("doc": ")" << escape(getDoc()) << "\",\n"; @@ -481,12 +573,16 @@ void NodeArray::printJson(std::ostream &os, size_t depth) const { os << indent(depth + 1) << "\"items\": "; leafAttributes_.get()->printJson(os, depth + 1); os << '\n'; + for (size_t i = 0; i != customAttributes_.size(); i++) { + printCustomAttributes(customAttributes_.get(i), depth + 1, os); + } os << indent(depth) << '}'; } void NodeMap::printJson(std::ostream &os, size_t depth) const { os << "{\n"; os << indent(depth + 1) << "\"type\": \"map\",\n"; + printLogicalType(os, logicalType(), depth + 1); if (!getDoc().empty()) { os << indent(depth + 1) << R"("doc": ")" << escape(getDoc()) << "\",\n"; @@ -494,6 +590,9 @@ void NodeMap::printJson(std::ostream &os, size_t depth) const { os << indent(depth + 1) << "\"values\": "; leafAttributes_.get(1)->printJson(os, depth + 1); os << '\n'; + for (size_t i = 0; i != customAttributes_.size(); i++) { + printCustomAttributes(customAttributes_.get(i), depth + 1, os); + } os << indent(depth) << '}'; } @@ -504,9 +603,9 @@ NodeMap::NodeMap() : NodeImplMap(AVRO_MAP) { void NodeUnion::printJson(std::ostream &os, size_t depth) const { os << "[\n"; - int fields = leafAttributes_.size(); + auto fields = leafAttributes_.size(); ++depth; - for (int i = 0; i < fields; ++i) { + for (size_t i = 0; i < fields; ++i) { if (i > 0) { os << ",\n"; } @@ -533,8 +632,11 @@ void NodeFixed::printJson(std::ostream &os, size_t depth) const { logicalType().printJson(os); } - os << "\n" - << indent(--depth) << '}'; + os << "\n"; + for (size_t i = 0; i != customAttributes_.size(); i++) { + printCustomAttributes(customAttributes_.get(i), depth, os); + } + os << indent(--depth) << '}'; } } // namespace avro diff --git a/lang/c++/impl/Resolver.cc b/lang/c++/impl/Resolver.cc index 919345e8a2d..b7a57197bec 100644 --- a/lang/c++/impl/Resolver.cc +++ b/lang/c++/impl/Resolver.cc @@ -51,7 +51,7 @@ class PrimitiveSkipper : public Resolver { public: PrimitiveSkipper() : Resolver() {} - void parse(Reader &reader, uint8_t *address) const final { + void parse(Reader &reader, uint8_t *) const final { T val; reader.readValue(val); DEBUG_OUT("Skipping " << val); @@ -93,7 +93,7 @@ class PrimitivePromoter : public Resolver { DEBUG_OUT("Promoting " << val); } - void parseIt(Reader &reader, uint8_t *, const std::false_type &) const {} + void parseIt(Reader &, uint8_t *, const std::false_type &) const {} template void parseIt(Reader &reader, uint8_t *address) const { @@ -108,7 +108,7 @@ class PrimitiveSkipper> : public Resolver { public: PrimitiveSkipper() : Resolver() {} - void parse(Reader &reader, uint8_t *address) const final { + void parse(Reader &reader, uint8_t *) const final { std::vector val; reader.readBytes(val); DEBUG_OUT("Skipping bytes"); @@ -276,9 +276,9 @@ class ArrayParser : public Resolver { class EnumSkipper : public Resolver { public: - EnumSkipper(ResolverFactory &factory, const NodePtr &writer) : Resolver() {} + EnumSkipper(ResolverFactory &, const NodePtr &) : Resolver() {} - void parse(Reader &reader, uint8_t *address) const final { + void parse(Reader &reader, uint8_t *) const final { int64_t val = reader.readEnum(); DEBUG_OUT("Skipping enum" << val); } @@ -290,9 +290,9 @@ class EnumParser : public Resolver { VAL }; - EnumParser(ResolverFactory &factory, const NodePtr &writer, const NodePtr &reader, const CompoundLayout &offsets) : Resolver(), - offset_(offsets.at(0).offset()), - readerSize_(reader->names()) { + EnumParser(ResolverFactory &, const NodePtr &writer, const NodePtr &reader, const CompoundLayout &offsets) : Resolver(), + offset_(offsets.at(0).offset()), + readerSize_(reader->names()) { const size_t writerSize = writer->names(); mapping_.reserve(writerSize); @@ -307,7 +307,7 @@ class EnumParser : public Resolver { void parse(Reader &reader, uint8_t *address) const final { auto val = static_cast(reader.readEnum()); - assert(static_cast(val) < mapping_.size()); + assert(val < mapping_.size()); if (mapping_[val] < readerSize_) { auto *location = reinterpret_cast(address + offset_); @@ -349,7 +349,7 @@ class UnionParser : public Resolver { *readerChoice = choiceMapping_[writerChoice]; auto *setter = reinterpret_cast(address + setFuncOffset_); - auto *value = reinterpret_cast(address + offset_); + uint8_t *value = address + offset_; uint8_t *location = (*setter)(value, *readerChoice); resolvers_[writerChoice]->parse(reader, location); @@ -397,7 +397,7 @@ class NonUnionToUnionParser : public Resolver { auto *choice = reinterpret_cast(address + choiceOffset_); *choice = choice_; auto *setter = reinterpret_cast(address + setFuncOffset_); - auto *value = reinterpret_cast(address + offset_); + uint8_t *value = address + offset_; uint8_t *location = (*setter)(value, choice_); resolver_->parse(reader, location); @@ -413,43 +413,43 @@ class NonUnionToUnionParser : public Resolver { class FixedSkipper : public Resolver { public: - FixedSkipper(ResolverFactory &factory, const NodePtr &writer) : Resolver() { + FixedSkipper(ResolverFactory &, const NodePtr &writer) : Resolver() { size_ = writer->fixedSize(); } - void parse(Reader &reader, uint8_t *address) const final { + void parse(Reader &reader, uint8_t *) const final { DEBUG_OUT("Skipping fixed"); std::unique_ptr val(new uint8_t[size_]); reader.readFixed(&val[0], size_); } protected: - int size_; + size_t size_; }; class FixedParser : public Resolver { public: - FixedParser(ResolverFactory &factory, const NodePtr &writer, const NodePtr &reader, const CompoundLayout &offsets) : Resolver() { + FixedParser(ResolverFactory &, const NodePtr &writer, const NodePtr &, const CompoundLayout &offsets) : Resolver() { size_ = writer->fixedSize(); offset_ = offsets.at(0).offset(); } void parse(Reader &reader, uint8_t *address) const final { DEBUG_OUT("Reading fixed"); - auto *location = reinterpret_cast(address + offset_); + uint8_t *location = address + offset_; reader.readFixed(location, size_); } protected: - int size_; + size_t size_; size_t offset_; }; -class ResolverFactory : private boost::noncopyable { +class ResolverFactory { template unique_ptr - constructPrimitiveSkipper(const NodePtr &writer) { + constructPrimitiveSkipper(const NodePtr &) { return unique_ptr(new PrimitiveSkipper()); } @@ -512,6 +512,10 @@ class ResolverFactory : private boost::noncopyable { } public: + ResolverFactory() = default; + ResolverFactory(const ResolverFactory &) = delete; + ResolverFactory &operator=(const ResolverFactory &) = delete; + unique_ptr construct(const NodePtr &writer, const NodePtr &reader, const Layout &offset) { @@ -710,8 +714,8 @@ NonUnionToUnionParser::NonUnionToUnionParser(ResolverFactory &factory, const NodePtr &writer, const NodePtr &reader, const CompoundLayout &offsets) : Resolver(), - offset_(offsets.offset()), choice_(0), + offset_(offsets.offset()), choiceOffset_(offsets.at(0).offset()), setFuncOffset_(offsets.at(1).offset()) { #ifndef NDEBUG diff --git a/lang/c++/impl/Schema.cc b/lang/c++/impl/Schema.cc index 42245292e67..8f42b850a09 100644 --- a/lang/c++/impl/Schema.cc +++ b/lang/c++/impl/Schema.cc @@ -18,6 +18,7 @@ #include +#include "CustomAttributes.hh" #include "Schema.hh" namespace avro { @@ -27,11 +28,18 @@ RecordSchema::RecordSchema(const std::string &name) : Schema(new NodeRecord) { } void RecordSchema::addField(const std::string &name, const Schema &fieldSchema) { + const CustomAttributes emptyCustomAttribute; + addField(name, fieldSchema, emptyCustomAttribute); +} + +void RecordSchema::addField(const std::string &name, const Schema &fieldSchema, const CustomAttributes &customFields) { // add the name first. it will throw if the name is a duplicate, preventing // the leaf from being added node_->addName(name); node_->addLeaf(fieldSchema.root()); + + node_->addCustomAttributesForField(customFields); } std::string RecordSchema::getDoc() const { diff --git a/lang/c++/impl/Stream.cc b/lang/c++/impl/Stream.cc index 63a8b4e8fc5..1ca5c346466 100644 --- a/lang/c++/impl/Stream.cc +++ b/lang/c++/impl/Stream.cc @@ -17,6 +17,10 @@ */ #include "Stream.hh" + +#include +#include +#include #include namespace avro { @@ -117,7 +121,7 @@ class MemoryInputStream2 : public InputStream { } }; -class MemoryOutputStream : public OutputStream { +class MemoryOutputStream final : public OutputStream { public: const size_t chunkSize_; std::vector data_; @@ -129,7 +133,7 @@ class MemoryOutputStream : public OutputStream { ~MemoryOutputStream() final { for (std::vector::const_iterator it = data_.begin(); it != data_.end(); ++it) { - delete[] * it; + delete[] *it; } } diff --git a/lang/c++/impl/ValidSchema.cc b/lang/c++/impl/ValidSchema.cc index 63a3bbee919..d99d7e24198 100644 --- a/lang/c++/impl/ValidSchema.cc +++ b/lang/c++/impl/ValidSchema.cc @@ -16,7 +16,6 @@ * limitations under the License. */ -#include #include #include #include @@ -25,7 +24,6 @@ #include "Schema.hh" #include "ValidSchema.hh" -using boost::format; using std::make_pair; using std::ostringstream; using std::shared_ptr; @@ -37,8 +35,7 @@ using SymbolMap = std::map; static bool validate(const NodePtr &node, SymbolMap &symbolMap) { if (!node->isValid()) { - throw Exception(format("Schema is invalid, due to bad node of type %1%") - % node->type()); + throw Exception("Schema is invalid, due to bad node of type {}", node->type()); } if (node->hasName()) { @@ -51,7 +48,7 @@ static bool validate(const NodePtr &node, SymbolMap &symbolMap) { if (node->type() == AVRO_SYMBOLIC) { if (!found) { - throw Exception(format("Symbolic name \"%1%\" is unknown") % node->name()); + throw Exception("Symbolic name \"{}\" is unknown", node->name()); } shared_ptr symNode = @@ -69,8 +66,8 @@ static bool validate(const NodePtr &node, SymbolMap &symbolMap) { } node->lock(); - auto leaves = node->leaves(); - for (auto i = 0; i < leaves; ++i) { + size_t leaves = node->leaves(); + for (size_t i = 0; i < leaves; ++i) { const NodePtr &leaf(node->leafAt(i)); if (!validate(leaf, symbolMap)) { diff --git a/lang/c++/impl/Validator.cc b/lang/c++/impl/Validator.cc index 0e5fd8bedad..c00460480b1 100644 --- a/lang/c++/impl/Validator.cc +++ b/lang/c++/impl/Validator.cc @@ -62,7 +62,7 @@ bool Validator::countingSetup() { compoundStack_.pop_back(); proceed = false; } else { - counters_.push_back(static_cast(count_)); + counters_.push_back(count_); } } @@ -71,14 +71,14 @@ bool Validator::countingSetup() { void Validator::countingAdvance() { if (countingSetup()) { - auto index = (compoundStack_.back().pos)++; + size_t index = (compoundStack_.back().pos)++; const NodePtr &node = compoundStack_.back().node; if (index < node->leaves()) { setupOperation(node->leafAt(index)); } else { compoundStack_.back().pos = 0; - int count = --counters_.back(); + size_t count = --counters_.back(); if (count == 0) { counters_.pop_back(); compoundStarted_ = true; @@ -100,14 +100,13 @@ void Validator::unionAdvance() { waitingForCount_ = false; NodePtr node = compoundStack_.back().node; - if (count_ < static_cast(node->leaves())) { + if (count_ < node->leaves()) { compoundStack_.pop_back(); setupOperation(node->leafAt(static_cast(count_))); } else { throw Exception( - boost::format("Union selection out of range, got %1%," - " expecting 0-%2%") - % count_ % (node->leaves() - 1)); + "Union selection out of range, got {}, expecting 0-{}", + count_, node->leaves() - 1); } } } @@ -117,7 +116,7 @@ void Validator::fixedAdvance() { compoundStack_.pop_back(); } -int Validator::nextSizeExpected() const { +size_t Validator::nextSizeExpected() const { return compoundStack_.back().node->fixedSize(); } @@ -169,11 +168,9 @@ void Validator::advance() { } } -void Validator::setCount(int64_t count) { +void Validator::setCount(size_t count) { if (!waitingForCount_) { throw Exception("Not expecting count"); - } else if (count_ < 0) { - throw Exception("Count cannot be negative"); } count_ = count; diff --git a/lang/c++/impl/Zigzag.cc b/lang/c++/impl/Zigzag.cc index 538a89cbaa7..7875f789bd2 100644 --- a/lang/c++/impl/Zigzag.cc +++ b/lang/c++/impl/Zigzag.cc @@ -30,11 +30,11 @@ encodeInt64(int64_t input, std::array &output) noexcept { auto v = val & mask; size_t bytesOut = 0; while (val >>= 7) { - output[bytesOut++] = (v | 0x80); + output[bytesOut++] = static_cast(v | 0x80); v = val & mask; } - output[bytesOut++] = v; + output[bytesOut++] = static_cast(v); return bytesOut; } size_t @@ -46,11 +46,11 @@ encodeInt32(int32_t input, std::array &output) noexcept { auto v = val & mask; size_t bytesOut = 0; while (val >>= 7) { - output[bytesOut++] = (v | 0x80); + output[bytesOut++] = static_cast(v | 0x80); v = val & mask; } - output[bytesOut++] = v; + output[bytesOut++] = static_cast(v); return bytesOut; } diff --git a/lang/c++/impl/ZstdCompressWrapper.cc b/lang/c++/impl/ZstdCompressWrapper.cc new file mode 100644 index 00000000000..fecf335ef6f --- /dev/null +++ b/lang/c++/impl/ZstdCompressWrapper.cc @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef ZSTD_CODEC_AVAILABLE + +#include "ZstdCompressWrapper.hh" +#include "Exception.hh" + +#include + +namespace avro { + +std::vector ZstdCompressWrapper::compress(const std::vector &uncompressed, std::optional compressionLevel) { + // Pre-allocate buffer for compressed data + size_t max_compressed_size = ZSTD_compressBound(uncompressed.size()); + if (ZSTD_isError(max_compressed_size)) { + throw Exception("ZSTD compression error: {}", ZSTD_getErrorName(max_compressed_size)); + } + std::vector compressed(max_compressed_size); + + // Compress the data using ZSTD block API + size_t compressed_size = ZSTD_compress( + compressed.data(), max_compressed_size, + uncompressed.data(), uncompressed.size(), + compressionLevel.value_or(ZSTD_CLEVEL_DEFAULT)); + + if (ZSTD_isError(compressed_size)) { + throw Exception("ZSTD compression error: {}", ZSTD_getErrorName(compressed_size)); + } + compressed.resize(compressed_size); + return compressed; +} + +ZstdCompressWrapper::ZstdCompressWrapper() { + cctx_ = ZSTD_createCCtx(); + if (!cctx_) { + throw Exception("ZSTD_createCCtx() failed"); + } +} + +ZstdCompressWrapper::~ZstdCompressWrapper() { + ZSTD_freeCCtx(cctx_); +} + +} // namespace avro + +#endif // ZSTD_CODEC_AVAILABLE diff --git a/lang/csharp/src/apache/codegen/Properties/AssemblyInfo.cs b/lang/c++/impl/ZstdCompressWrapper.hh similarity index 59% rename from lang/csharp/src/apache/codegen/Properties/AssemblyInfo.cs rename to lang/c++/impl/ZstdCompressWrapper.hh index 6175167803a..419fb261770 100644 --- a/lang/csharp/src/apache/codegen/Properties/AssemblyInfo.cs +++ b/lang/c++/impl/ZstdCompressWrapper.hh @@ -1,4 +1,4 @@ -īģŋ/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -16,18 +16,31 @@ * limitations under the License. */ -using System.Reflection; -using System.Runtime.InteropServices; - -[assembly: AssemblyTitle("Avro.codegen")] -[assembly: AssemblyDescription("")] -[assembly: AssemblyConfiguration("")] -[assembly: AssemblyCompany("Apache")] -[assembly: AssemblyProduct("Avro.codegen")] -[assembly: AssemblyCopyright("Copyright Š Apache 2013")] -[assembly: AssemblyTrademark("")] -[assembly: AssemblyCulture("")] -[assembly: ComVisible(false)] -[assembly: Guid("3C23DD33-DD4F-42B1-B71F-8F9C86929E58")] -[assembly: AssemblyVersion("0.9.0.0")] -[assembly: AssemblyFileVersion("0.9.0.0")] \ No newline at end of file +#ifndef avro_ZstdCompressWrapper_hh__ +#define avro_ZstdCompressWrapper_hh__ + +#ifdef ZSTD_CODEC_AVAILABLE + +#include +#include + +#include + +namespace avro { + +class ZstdCompressWrapper { +public: + ZstdCompressWrapper(); + ~ZstdCompressWrapper(); + + std::vector compress(const std::vector &uncompressed, std::optional compressionLevel = std::nullopt); + +private: + ZSTD_CCtx *cctx_ = nullptr; +}; + +} // namespace avro + +#endif // ZSTD_CODEC_AVAILABLE + +#endif // avro_ZstdCompressWrapper_hh__ diff --git a/lang/c++/impl/ZstdDecompressWrapper.cc b/lang/c++/impl/ZstdDecompressWrapper.cc new file mode 100644 index 00000000000..86d996dd315 --- /dev/null +++ b/lang/c++/impl/ZstdDecompressWrapper.cc @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef ZSTD_CODEC_AVAILABLE + +#include "ZstdDecompressWrapper.hh" +#include "Exception.hh" + +#include + +namespace avro { + +std::string ZstdDecompressWrapper::decompress(const std::vector &compressed) { + std::string uncompressed; + // Get the decompressed size + size_t decompressed_size = ZSTD_getFrameContentSize(compressed.data(), compressed.size()); + if (decompressed_size == ZSTD_CONTENTSIZE_ERROR) { + throw Exception("ZSTD: Not a valid compressed frame"); + } else if (decompressed_size == ZSTD_CONTENTSIZE_UNKNOWN) { + // Stream decompress the data + ZSTD_inBuffer in{compressed.data(), compressed.size(), 0}; + std::vector tmp(ZSTD_DStreamOutSize()); + ZSTD_outBuffer out{tmp.data(), tmp.size(), 0}; + size_t ret; + do { + out.pos = 0; + ret = ZSTD_decompressStream(dctx_, &out, &in); + if (ZSTD_isError(ret)) { + throw Exception("ZSTD decompression error: {}", ZSTD_getErrorName(ret)); + } + uncompressed.append(tmp.data(), out.pos); + } while (ret != 0); + } else { + // Batch decompress the data + uncompressed.resize(decompressed_size); + size_t result = ZSTD_decompress( + uncompressed.data(), decompressed_size, compressed.data(), compressed.size()); + + if (ZSTD_isError(result)) { + throw Exception("ZSTD decompression error: {}", ZSTD_getErrorName(result)); + } + if (result != decompressed_size) { + throw Exception("ZSTD: Decompressed size mismatch: expected {}, got {}", + decompressed_size, result); + } + } + return uncompressed; +} + +ZstdDecompressWrapper::ZstdDecompressWrapper() { + dctx_ = ZSTD_createDCtx(); + if (!dctx_) { + throw Exception("ZSTD_createDCtx() failed"); + } +} + +ZstdDecompressWrapper::~ZstdDecompressWrapper() { + ZSTD_freeDCtx(dctx_); +} + +} // namespace avro + +#endif // ZSTD_CODEC_AVAILABLE diff --git a/lang/c++/impl/ZstdDecompressWrapper.hh b/lang/c++/impl/ZstdDecompressWrapper.hh new file mode 100644 index 00000000000..b5b97758c48 --- /dev/null +++ b/lang/c++/impl/ZstdDecompressWrapper.hh @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_ZstdDecompressWrapper_hh__ +#define avro_ZstdDecompressWrapper_hh__ + +#ifdef ZSTD_CODEC_AVAILABLE + +#include +#include + +#include + +namespace avro { + +class ZstdDecompressWrapper { +public: + ZstdDecompressWrapper(); + ~ZstdDecompressWrapper(); + + std::string decompress(const std::vector &compressed); + +private: + ZSTD_DCtx *dctx_ = nullptr; +}; + +} // namespace avro + +#endif // ZSTD_CODEC_AVAILABLE + +#endif // avro_ZstdDecompressWrapper_hh__ diff --git a/lang/c++/impl/avrogencpp.cc b/lang/c++/impl/avrogencpp.cc index 0b6b35a2f23..d6e914e9657 100644 --- a/lang/c++/impl/avrogencpp.cc +++ b/lang/c++/impl/avrogencpp.cc @@ -16,24 +16,20 @@ * limitations under the License. */ +#include #include #ifndef _WIN32 #include #endif #include #include +#include #include +#include +#include #include - -#include -#include -#include - -#include #include -#include - #include "Compiler.hh" #include "NodeImpl.hh" #include "ValidSchema.hh" @@ -48,17 +44,9 @@ using std::set; using std::string; using std::vector; -using boost::lexical_cast; - using avro::compileJsonSchema; using avro::ValidSchema; -#if __cplusplus >= 201703L -#define ANY_NS "std" -#else -#define ANY_NS "boost" -#endif - struct PendingSetterGetter { string structName; string type; @@ -75,8 +63,22 @@ struct PendingConstructor { PendingConstructor(string sn, string n, bool im) : structName(std::move(sn)), memberName(std::move(n)), initMember(im) {} }; +class UnionCodeTracker { + std::string schemaFile_; + size_t unionNumber_ = 0; + std::map, std::string> unionBranchNameMapping_; + std::set generatedUnionTraits_; + +public: + explicit UnionCodeTracker(const std::string &schemaFile); + std::optional getExistingUnionName(const std::vector &unionBranches) const; + std::string generateNewUnionName(const std::vector &unionBranches); + bool unionTraitsAlreadyGenerated(const std::string &unionClassName) const; + void setTraitsGenerated(const std::string &unionClassName); +}; + class CodeGen { - size_t unionNumber_; + UnionCodeTracker unionTracker_; std::ostream &os_; bool inNamespace_; const std::string ns_; @@ -85,7 +87,7 @@ class CodeGen { const std::string includePrefix_; const bool noUnion_; const std::string guardString_; - boost::mt19937 random_; + std::mt19937 random_; vector pendingGettersAndSetters; vector pendingConstructors; @@ -98,7 +100,6 @@ class CodeGen { std::string generateEnumType(const NodePtr &n); std::string cppTypeOf(const NodePtr &n); std::string generateRecordType(const NodePtr &n); - std::string unionName(); std::string generateUnionType(const NodePtr &n); std::string generateType(const NodePtr &n); std::string generateDeclaration(const NodePtr &n); @@ -107,17 +108,21 @@ class CodeGen { void generateTraits(const NodePtr &n); void generateRecordTraits(const NodePtr &n); void generateUnionTraits(const NodePtr &n); + void generateDocComment(const NodePtr &n, const char *indent = ""); void emitCopyright(); + void emitGeneratedWarning(); public: CodeGen(std::ostream &os, std::string ns, std::string schemaFile, std::string headerFile, std::string guardString, - std::string includePrefix, bool noUnion) : unionNumber_(0), os_(os), inNamespace_(false), ns_(std::move(ns)), + std::string includePrefix, bool noUnion) : unionTracker_(schemaFile), os_(os), inNamespace_(false), ns_(std::move(ns)), schemaFile_(std::move(schemaFile)), headerFile_(std::move(headerFile)), includePrefix_(std::move(includePrefix)), noUnion_(noUnion), guardString_(std::move(guardString)), - random_(static_cast(::time(nullptr))) {} + random_(static_cast(::time(nullptr))) { + } + void generate(const ValidSchema &schema); }; @@ -125,7 +130,7 @@ static string decorate(const std::string &name) { static const char *cppReservedWords[] = { "alignas", "alignof", "and", "and_eq", "asm", "auto", "bitand", "bitor", "bool", "break", "case", "catch", "char", "char8_t", "char16_t", "char32_t", "class", "compl", "concept", - "const", "consteval", "constexpr", "const_cast", "continue", "co_await", "co_return", + "const", "consteval", "constexpr", "constinit", "const_cast", "continue", "co_await", "co_return", "co_yield", "decltype", "default", "delete", "do", "double", "dynamic_cast", "else", "enum", "explicit", "export", "extern", "false", "float", "for", "friend", "goto", "if", "import", "inline", "int", "long", "module", "mutable", "namespace", "new", "noexcept", "not", @@ -186,7 +191,7 @@ string CodeGen::cppTypeOf(const NodePtr &n) { case avro::AVRO_MAP: return "std::mapleafAt(1)) + " >"; case avro::AVRO_FIXED: - return "std::array(n->fixedSize()) + ">"; + return "std::arrayfixedSize()) + ">"; case avro::AVRO_SYMBOLIC: return cppTypeOf(resolveSymbol(n)); case avro::AVRO_UNION: @@ -244,12 +249,18 @@ string CodeGen::generateRecordType(const NodePtr &n) { return it->second; } + generateDocComment(n); os_ << "struct " << decoratedName << " {\n"; if (!noUnion_) { for (size_t i = 0; i < c; ++i) { if (n->leafAt(i)->type() == avro::AVRO_UNION) { os_ << " typedef " << types[i] << ' ' << n->nameAt(i) << "_t;\n"; + types[i] = n->nameAt(i) + "_t"; + } + if (n->leafAt(i)->type() == avro::AVRO_ARRAY && n->leafAt(i)->leafAt(0)->type() == avro::AVRO_UNION) { + os_ << " typedef " << types[i] << "::value_type" + << ' ' << n->nameAt(i) << "_item_t;\n"; } } } @@ -257,11 +268,8 @@ string CodeGen::generateRecordType(const NodePtr &n) { // the nameAt(i) does not take c++ reserved words into account // so we need to call decorate on it std::string decoratedNameAt = decorate(n->nameAt(i)); - if (!noUnion_ && n->leafAt(i)->type() == avro::AVRO_UNION) { - os_ << " " << decoratedNameAt << "_t"; - } else { - os_ << " " << types[i]; - } + generateDocComment(n->leafAt(i), " "); + os_ << " " << types[i]; os_ << ' ' << decoratedNameAt << ";\n"; } @@ -275,13 +283,7 @@ string CodeGen::generateRecordType(const NodePtr &n) { // so we need to call decorate on it std::string decoratedNameAt = decorate(n->nameAt(i)); os_ << " " << decoratedNameAt << "("; - if (!noUnion_ && n->leafAt(i)->type() == avro::AVRO_UNION) { - // the nameAt(i) does not take c++ reserved words into account - // so we need to call decorate on it - os_ << decoratedNameAt << "_t"; - } else { - os_ << types[i]; - } + os_ << types[i]; os_ << "())"; if (i != (c - 1)) { os_ << ','; @@ -305,17 +307,6 @@ void makeCanonical(string &s, bool foldCase) { } } -string CodeGen::unionName() { - string s = schemaFile_; - string::size_type n = s.find_last_of("/\\"); - if (n != string::npos) { - s = s.substr(n); - } - makeCanonical(s, false); - - return s + "_Union__" + boost::lexical_cast(unionNumber_++) + "__"; -} - static void generateGetterAndSetter(ostream &os, const string &structName, const string &type, const string &name, size_t idx) { @@ -323,12 +314,21 @@ static void generateGetterAndSetter(ostream &os, os << "inline\n"; - os << type << sn << "get_" << name << "() const {\n" + os << "const " << type << "&" << sn << "get_" << name << "() const {\n" << " if (idx_ != " << idx << ") {\n" << " throw avro::Exception(\"Invalid type for " - << "union\");\n" + << "union " << structName << "\");\n" << " }\n" - << " return " << ANY_NS << "::any_cast<" << type << " >(value_);\n" + << " return *std::any_cast<" << type << " >(&value_);\n" + << "}\n\n"; + + os << "inline\n" + << type << "&" << sn << "get_" << name << "() {\n" + << " if (idx_ != " << idx << ") {\n" + << " throw avro::Exception(\"Invalid type for " + << "union " << structName << "\");\n" + << " }\n" + << " return *std::any_cast<" << type << " >(&value_);\n" << "}\n\n"; os << "inline\n" @@ -337,6 +337,13 @@ static void generateGetterAndSetter(ostream &os, << " idx_ = " << idx << ";\n" << " value_ = v;\n" << "}\n\n"; + + os << "inline\n" + << "void" << sn << "set_" << name + << "(" << type << "&& v) {\n" + << " idx_ = " << idx << ";\n" + << " value_ = std::move(v);\n" + << "}\n\n"; } static void generateConstructor(ostream &os, @@ -380,14 +387,43 @@ string CodeGen::generateUnionType(const NodePtr &n) { return done[n]; } - auto result = unionName(); + // re-use existing union types that have the exact same branches + if (const auto existingName = unionTracker_.getExistingUnionName(types); existingName.has_value()) { + return existingName.value(); + } + const std::string result = unionTracker_.generateNewUnionName(types); os_ << "struct " << result << " {\n" << "private:\n" << " size_t idx_;\n" - << " " << ANY_NS << "::any value_;\n" - << "public:\n" - << " size_t idx() const { return idx_; }\n"; + << " std::any value_;\n" + << "public:\n"; + + os_ << " /** enum representing union branches as returned by the idx() function */\n" + << " enum class Branch: size_t {\n"; + + // generate a enum that maps the branch name to the corresponding index (as returned by idx()) + std::set used_branch_names; + for (size_t i = 0; i < c; ++i) { + // escape reserved literals for c++ + auto branch_name = decorate(names[i]); + // avoid rare collisions, e.g. someone might name their struct int_ + if (used_branch_names.find(branch_name) != used_branch_names.end()) { + size_t postfix = 2; + std::string escaped_name = branch_name + "_" + std::to_string(postfix); + while (used_branch_names.find(escaped_name) != used_branch_names.end()) { + ++postfix; + escaped_name = branch_name + "_" + std::to_string(postfix); + } + branch_name = escaped_name; + } + os_ << " " << branch_name << " = " << i << ",\n"; + used_branch_names.insert(branch_name); + } + os_ << " };\n"; + + os_ << " size_t idx() const { return idx_; }\n"; + os_ << " Branch branch() const { return static_cast(idx_); }\n"; for (size_t i = 0; i < c; ++i) { const NodePtr &nn = n->leafAt(i); @@ -397,14 +433,16 @@ string CodeGen::generateUnionType(const NodePtr &n) { << " }\n" << " void set_null() {\n" << " idx_ = " << i << ";\n" - << " value_ = " << ANY_NS << "::any();\n" + << " value_ = std::any();\n" << " }\n"; } else { const string &type = types[i]; const string &name = names[i]; - os_ << " " << type << " get_" << name << "() const;\n" - " void set_" - << name << "(const " << type << "& v);\n"; + os_ << " " + << "const " << type << "& get_" << name << "() const;\n" + << " " << type << "& get_" << name << "();\n" + << " void set_" << name << "(const " << type << "& v);\n" + << " void set_" << name << "(" << type << "&& v);\n"; pendingGettersAndSetters.emplace_back(result, type, name, i); } } @@ -551,8 +589,22 @@ void CodeGen::generateRecordTraits(const NodePtr &n) { } string fn = fullname(decorate(n->name())); - os_ << "template<> struct codec_traits<" << fn << "> {\n" - << " static void encode(Encoder& e, const " << fn << "& v) {\n"; + os_ << "template<> struct codec_traits<" << fn << "> {\n"; + + if (c == 0) { + os_ << " static void encode(Encoder&, const " << fn << "&) {}\n"; + // ResolvingDecoder::fieldOrder mutates the state of the decoder, so if that decoder is + // passed in, we need to call the method even though it will return an empty vector. + os_ << " static void decode(Decoder& d, " << fn << "&) {\n"; + os_ << " if (avro::ResolvingDecoder *rd = dynamic_cast(&d)) {\n"; + os_ << " rd->fieldOrder();\n"; + os_ << " }\n"; + os_ << " }\n"; + os_ << "};\n"; + return; + } + + os_ << " static void encode(Encoder& e, const " << fn << "& v) {\n"; for (size_t i = 0; i < c; ++i) { // the nameAt(i) does not take c++ reserved words into account @@ -596,6 +648,11 @@ void CodeGen::generateRecordTraits(const NodePtr &n) { } void CodeGen::generateUnionTraits(const NodePtr &n) { + const string name = done[n]; + const string fn = fullname(name); + if (unionTracker_.unionTraitsAlreadyGenerated(fn)) { + return; + } size_t c = n->leaves(); for (size_t i = 0; i < c; ++i) { @@ -603,9 +660,6 @@ void CodeGen::generateUnionTraits(const NodePtr &n) { generateTraits(nn); } - string name = done[n]; - string fn = fullname(name); - os_ << "template<> struct codec_traits<" << fn << "> {\n" << " static void encode(Encoder& e, " << fn << " v) {\n" << " e.encodeUnionIndex(v.idx());\n" @@ -641,7 +695,7 @@ void CodeGen::generateUnionTraits(const NodePtr &n) { os_ << " {\n" << " " << cppTypeOf(nn) << " vv;\n" << " avro::decode(d, vv);\n" - << " v.set_" << cppNameOf(nn) << "(vv);\n" + << " v.set_" << cppNameOf(nn) << "(std::move(vv));\n" << " }\n"; } os_ << " break;\n"; @@ -649,6 +703,8 @@ void CodeGen::generateUnionTraits(const NodePtr &n) { os_ << " }\n" << " }\n" << "};\n\n"; + + unionTracker_.setTraitsGenerated(fn); } void CodeGen::generateTraits(const NodePtr &n) { @@ -681,6 +737,43 @@ void CodeGen::generateTraits(const NodePtr &n) { } } +void CodeGen::generateDocComment(const NodePtr &n, const char *indent) { + if (!n->getDoc().empty()) { + std::vector lines; + { + const std::string &doc = n->getDoc(); + size_t pos = 0; + size_t found; + while ((found = doc.find('\n', pos)) != std::string::npos) { + lines.push_back(doc.substr(pos, found - pos)); + pos = found + 1; + } + if (pos < doc.size()) { + lines.push_back(doc.substr(pos)); + } + } + for (auto &line : lines) { + line.erase(std::remove(line.begin(), line.end(), '\r'), line.end()); + + if (line.empty()) { + os_ << indent << "//\n"; + } else { + // If a comment line ends with a backslash or backslash and whitespace, + // avoid generating code which will generate multi-line comment warnings + // on GCC. We can't just append whitespace here as escaped newlines ignore + // trailing whitespace. + auto lastBackslash = std::find(line.rbegin(), line.rend(), '\\'); + auto lastNonWs = std::find_if(line.rbegin(), line.rend(), [](char c) { return !std::isspace(static_cast(c)); }); + // Note: lastBackslash <= lastNonWs because the iterators are reversed, "less" is later in the string. + if (lastBackslash != line.rend() && lastBackslash <= lastNonWs) { + line.append("(backslash)"); + } + os_ << indent << "// " << line << "\n"; + } + } + } +} + void CodeGen::emitCopyright() { os_ << "/**\n" " * Licensed to the Apache Software Foundation (ASF) under one\n" @@ -702,17 +795,22 @@ void CodeGen::emitCopyright() { " * See the License for the specific language governing " "permissions and\n" " * limitations under the License.\n" - " */\n\n\n"; + " */\n\n"; +} + +void CodeGen::emitGeneratedWarning() { + os_ << "/* This code was generated by avrogencpp " << AVRO_VERSION << ". Do not edit.*/\n\n"; } string CodeGen::guard() { string h = headerFile_; makeCanonical(h, true); - return h + "_" + lexical_cast(random_()) + "__H_"; + return h + "_" + std::to_string(random_()) + "_H"; } void CodeGen::generate(const ValidSchema &schema) { emitCopyright(); + emitGeneratedWarning(); string h = guardString_.empty() ? guard() : guardString_; @@ -720,24 +818,15 @@ void CodeGen::generate(const ValidSchema &schema) { os_ << "#define " << h << "\n\n\n"; os_ << "#include \n" -#if __cplusplus >= 201703L << "#include \n" -#else - << "#include \"boost/any.hpp\"\n" -#endif + << "#include \n" << "#include \"" << includePrefix_ << "Specific.hh\"\n" << "#include \"" << includePrefix_ << "Encoder.hh\"\n" << "#include \"" << includePrefix_ << "Decoder.hh\"\n" << "\n"; - vector nsVector; if (!ns_.empty()) { - boost::algorithm::split_regex(nsVector, ns_, boost::regex("::")); - for (vector::const_iterator it = - nsVector.begin(); - it != nsVector.end(); ++it) { - os_ << "namespace " << *it << " {\n"; - } + os_ << "namespace " << ns_ << " {\n"; inNamespace_ = true; } @@ -760,17 +849,11 @@ void CodeGen::generate(const ValidSchema &schema) { if (!ns_.empty()) { inNamespace_ = false; - for (vector::const_iterator it = - nsVector.begin(); - it != nsVector.end(); ++it) { - os_ << "}\n"; - } + os_ << "}\n"; } os_ << "namespace avro {\n"; - unionNumber_ = 0; - generateTraits(root); os_ << "}\n"; @@ -779,19 +862,24 @@ void CodeGen::generate(const ValidSchema &schema) { os_.flush(); } -namespace po = boost::program_options; - static string readGuard(const string &filename) { std::ifstream ifs(filename.c_str()); string buf; string candidate; while (std::getline(ifs, buf)) { - boost::algorithm::trim(buf); + if (!buf.empty()) { + size_t start = 0, end = buf.length(); + while (start < end && std::isspace(buf[start], std::locale::classic())) start++; + while (start < end && std::isspace(buf[end - 1], std::locale::classic())) end--; + if (start > 0 || end < buf.length()) { + buf = buf.substr(start, end - start); + } + } if (candidate.empty()) { - if (boost::algorithm::starts_with(buf, "#ifndef ")) { + if (buf.compare(0, 8, "#ifndef ") == 0) { candidate = buf.substr(8); } - } else if (boost::algorithm::starts_with(buf, "#define ")) { + } else if (buf.compare(0, 8, "#define ") == 0) { if (candidate == buf.substr(8)) { break; } @@ -802,31 +890,107 @@ static string readGuard(const string &filename) { return candidate; } +struct ProgramOptions { + bool helpRequested = false; + bool versionRequested = false; + bool noUnionTypedef = false; + std::string includePrefix = "avro"; + std::string nameSpace; + std::string inputFile; + std::string outputFile; +}; + +static void printUsage() { + std::cout << "Allowed options:\n" + << " -h [ --help ] produce help message\n" + << " -V [ --version ] produce version information\n" + << " -p [ --include-prefix ] arg (=avro) prefix for include headers, - for none, default: avro\n" + << " -U [ --no-union-typedef ] do not generate typedefs for unions in records\n" + << " -n [ --namespace ] arg set namespace for generated code\n" + << " -i [ --input ] arg input file\n" + << " -o [ --output ] arg output file to generate\n"; +} + +static bool parseArgs(int argc, char **argv, ProgramOptions &opts) { + for (int i = 1; i < argc; ++i) { + std::string arg = argv[i]; + + if (arg == "-h" || arg == "--help") { + opts.helpRequested = true; + return true; + } + + if (arg == "-V" || arg == "--version") { + opts.versionRequested = true; + return true; + } + + if (arg == "-U" || arg == "--no-union-typedef") { + opts.noUnionTypedef = true; + continue; + } + + if (arg == "-p" || arg == "--include-prefix") { + if (i + 1 < argc) { + opts.includePrefix = argv[++i]; + continue; + } + } else if (arg == "-n" || arg == "--namespace") { + if (i + 1 < argc) { + opts.nameSpace = argv[++i]; + continue; + } + } else if (arg == "-i" || arg == "--input") { + if (i + 1 < argc) { + opts.inputFile = argv[++i]; + continue; + } + } else if (arg == "-o" || arg == "--output") { + if (i + 1 < argc) { + opts.outputFile = argv[++i]; + continue; + } + } else { + std::cerr << "Unknown option: " << arg << std::endl; + return false; + } + + std::cerr << "Missing value for option: " << arg << std::endl; + return false; + } + + return true; +} + int main(int argc, char **argv) { - const string NS("namespace"); - const string OUT_FILE("output"); - const string IN_FILE("input"); - const string INCLUDE_PREFIX("include-prefix"); - const string NO_UNION_TYPEDEF("no-union-typedef"); - - po::options_description desc("Allowed options"); - desc.add_options()("help,h", "produce help message")("include-prefix,p", po::value()->default_value("avro"), - "prefix for include headers, - for none, default: avro")("no-union-typedef,U", "do not generate typedefs for unions in records")("namespace,n", po::value(), "set namespace for generated code")("input,i", po::value(), "input file")("output,o", po::value(), "output file to generate"); - - po::variables_map vm; - po::store(po::parse_command_line(argc, argv, desc), vm); - po::notify(vm); - - if (vm.count("help") || vm.count(IN_FILE) == 0 || vm.count(OUT_FILE) == 0) { - std::cout << desc << std::endl; + ProgramOptions opts; + if (!parseArgs(argc, argv, opts)) { + printUsage(); return 1; } - string ns = vm.count(NS) > 0 ? vm[NS].as() : string(); - string outf = vm.count(OUT_FILE) > 0 ? vm[OUT_FILE].as() : string(); - string inf = vm.count(IN_FILE) > 0 ? vm[IN_FILE].as() : string(); - string incPrefix = vm[INCLUDE_PREFIX].as(); - bool noUnion = vm.count(NO_UNION_TYPEDEF) != 0; + if (opts.helpRequested) { + printUsage(); + return 0; + } + + if (opts.versionRequested) { + std::cout << AVRO_VERSION << std::endl; + return 0; + } + + if (opts.inputFile.empty() || opts.outputFile.empty()) { + std::cerr << "Input and output files are required.\n\n"; + printUsage(); + return 1; + } + + std::string ns = opts.nameSpace; + std::string outf = opts.outputFile; + std::string inf = opts.inputFile; + std::string incPrefix = opts.includePrefix; + bool noUnion = opts.noUnionTypedef; + if (incPrefix == "-") { incPrefix.clear(); } else if (*incPrefix.rbegin() != '/') { @@ -857,3 +1021,34 @@ int main(int argc, char **argv) { return 1; } } + +UnionCodeTracker::UnionCodeTracker(const std::string &schemaFile) : schemaFile_(schemaFile) { +} + +std::optional UnionCodeTracker::getExistingUnionName(const std::vector &unionBranches) const { + if (const auto it = unionBranchNameMapping_.find(unionBranches); it != unionBranchNameMapping_.end()) { + return it->second; + } + return std::nullopt; +} + +std::string UnionCodeTracker::generateNewUnionName(const std::vector &unionBranches) { + string s = schemaFile_; + string::size_type n = s.find_last_of("/\\"); + if (n != string::npos) { + s = s.substr(n); + } + makeCanonical(s, false); + + std::string result = s + "_Union__" + std::to_string(unionNumber_++) + "__"; + unionBranchNameMapping_.emplace(unionBranches, result); + return result; +} + +bool UnionCodeTracker::unionTraitsAlreadyGenerated(const std::string &unionClassName) const { + return generatedUnionTraits_.find(unionClassName) != generatedUnionTraits_.end(); +} + +void UnionCodeTracker::setTraitsGenerated(const std::string &unionClassName) { + generatedUnionTraits_.insert(unionClassName); +} diff --git a/lang/c++/impl/json/JsonDom.cc b/lang/c++/impl/json/JsonDom.cc index 5bffda2559c..504c4455473 100644 --- a/lang/c++/impl/json/JsonDom.cc +++ b/lang/c++/impl/json/JsonDom.cc @@ -25,9 +25,6 @@ #include "JsonIO.hh" #include "Stream.hh" -using boost::format; -using std::string; - namespace avro { namespace json { const char *typeToString(EntityType t) { @@ -142,19 +139,18 @@ void writeEntity(JsonGenerator &g, const Entity &n) { void Entity::ensureType(EntityType type) const { if (type_ != type) { - format msg = format("Invalid type. Expected \"%1%\" actual %2%") % typeToString(type) % typeToString(type_); - throw Exception(msg); + throw Exception("Invalid type. Expected \"{}\" actual {}", typeToString(type), typeToString(type_)); } } String Entity::stringValue() const { ensureType(EntityType::String); - return JsonParser::toStringValue(**boost::any_cast>(&value_)); + return JsonParser::toStringValue(**std::any_cast>(&value_)); } String Entity::bytesValue() const { ensureType(EntityType::String); - return JsonParser::toBytesValue(**boost::any_cast>(&value_)); + return JsonParser::toBytesValue(**std::any_cast>(&value_)); } std::string Entity::toString() const { @@ -181,5 +177,22 @@ std::string Entity::toString() const { return result; } +std::string Entity::toLiteralString() const { + switch (type_) { + case EntityType::Null: + return "null"; + case EntityType::Bool: + return boolValue() ? "true" : "false"; + case EntityType::Long: + return std::to_string(longValue()); + case EntityType::Double: + return std::to_string(doubleValue()); + case EntityType::String: + return stringValue(); + default: + return toString(); + } +} + } // namespace json } // namespace avro diff --git a/lang/c++/impl/json/JsonDom.hh b/lang/c++/impl/json/JsonDom.hh index 3fb5670b70b..b2be02b30cd 100644 --- a/lang/c++/impl/json/JsonDom.hh +++ b/lang/c++/impl/json/JsonDom.hh @@ -19,6 +19,7 @@ #ifndef avro_json_JsonDom_hh__ #define avro_json_JsonDom_hh__ +#include #include #include #include @@ -27,11 +28,10 @@ #include #include "Config.hh" -#include "boost/any.hpp" namespace avro { -class AVRO_DECL InputStream; +class InputStream; namespace json { class Entity; @@ -59,7 +59,7 @@ enum class EntityType { Obj }; -const char *typeToString(EntityType t); +AVRO_DECL const char *typeToString(EntityType t); inline std::ostream &operator<<(std::ostream &os, EntityType et) { return os << typeToString(et); @@ -67,7 +67,7 @@ inline std::ostream &operator<<(std::ostream &os, EntityType et) { class AVRO_DECL Entity { EntityType type_; - boost::any value_; + std::any value_; size_t line_; // can't be const else noncopyable... void ensureType(EntityType) const; @@ -76,22 +76,22 @@ public: explicit Entity(size_t line = 0) : type_(EntityType::Null), line_(line) {} // Not explicit because do want implicit conversion // NOLINTNEXTLINE(google-explicit-constructor) - Entity(Bool v, size_t line = 0) : type_(EntityType::Bool), value_(v), line_(line) {} + explicit Entity(Bool v, size_t line = 0) : type_(EntityType::Bool), value_(v), line_(line) {} // Not explicit because do want implicit conversion // NOLINTNEXTLINE(google-explicit-constructor) - Entity(Long v, size_t line = 0) : type_(EntityType::Long), value_(v), line_(line) {} + explicit Entity(Long v, size_t line = 0) : type_(EntityType::Long), value_(v), line_(line) {} // Not explicit because do want implicit conversion // NOLINTNEXTLINE(google-explicit-constructor) - Entity(Double v, size_t line = 0) : type_(EntityType::Double), value_(v), line_(line) {} + explicit Entity(Double v, size_t line = 0) : type_(EntityType::Double), value_(v), line_(line) {} // Not explicit because do want implicit conversion // NOLINTNEXTLINE(google-explicit-constructor) - Entity(const std::shared_ptr &v, size_t line = 0) : type_(EntityType::String), value_(v), line_(line) {} + explicit Entity(const std::shared_ptr &v, size_t line = 0) : type_(EntityType::String), value_(v), line_(line) {} // Not explicit because do want implicit conversion // NOLINTNEXTLINE(google-explicit-constructor) - Entity(const std::shared_ptr &v, size_t line = 0) : type_(EntityType::Arr), value_(v), line_(line) {} + explicit Entity(const std::shared_ptr &v, size_t line = 0) : type_(EntityType::Arr), value_(v), line_(line) {} // Not explicit because do want implicit conversion // NOLINTNEXTLINE(google-explicit-constructor) - Entity(const std::shared_ptr &v, size_t line = 0) : type_(EntityType::Obj), value_(v), line_(line) {} + explicit Entity(const std::shared_ptr &v, size_t line = 0) : type_(EntityType::Obj), value_(v), line_(line) {} EntityType type() const { return type_; } @@ -99,17 +99,17 @@ public: Bool boolValue() const { ensureType(EntityType::Bool); - return boost::any_cast(value_); + return std::any_cast(value_); } Long longValue() const { ensureType(EntityType::Long); - return boost::any_cast(value_); + return std::any_cast(value_); } Double doubleValue() const { ensureType(EntityType::Double); - return boost::any_cast(value_); + return std::any_cast(value_); } String stringValue() const; @@ -118,15 +118,16 @@ public: const Array &arrayValue() const { ensureType(EntityType::Arr); - return **boost::any_cast>(&value_); + return **std::any_cast>(&value_); } const Object &objectValue() const { ensureType(EntityType::Obj); - return **boost::any_cast>(&value_); + return **std::any_cast>(&value_); } std::string toString() const; + std::string toLiteralString() const; }; template diff --git a/lang/c++/impl/json/JsonIO.cc b/lang/c++/impl/json/JsonIO.cc index 62549484a92..5f5fe8e3bc7 100644 --- a/lang/c++/impl/json/JsonIO.cc +++ b/lang/c++/impl/json/JsonIO.cc @@ -55,7 +55,8 @@ void JsonParser::expectToken(Token tk) { if (cur() == Token::String && (sv == "Infinity" || sv == "-Infinity" || sv == "NaN")) { curToken = Token::Double; - dv = sv == "Infinity" ? std::numeric_limits::infinity() : sv == "-Infinity" ? -std::numeric_limits::infinity() : std::numeric_limits::quiet_NaN(); + dv = sv == "Infinity" ? std::numeric_limits::infinity() : sv == "-Infinity" ? -std::numeric_limits::infinity() + : std::numeric_limits::quiet_NaN(); return; } else if (cur() == Token::Long) { dv = double(lv); @@ -146,7 +147,8 @@ JsonParser::Token JsonParser::tryNumber(char ch) { sv.push_back(ch); hasNext = false; - int state = (ch == '-') ? 0 : (ch == '0') ? 1 : 2; + int state = (ch == '-') ? 0 : (ch == '0') ? 1 + : 2; for (;;) { switch (state) { case 0: @@ -291,13 +293,11 @@ JsonParser::Token JsonParser::tryString() { break; case 'u': case 'U': { - uint32_t n = 0; char e[4]; in_.readBytes(reinterpret_cast(e), 4); sv.push_back('\\'); sv.push_back(ch); for (char c : e) { - n *= 16; if (isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) { sv.push_back(c); } else { @@ -314,12 +314,40 @@ JsonParser::Token JsonParser::tryString() { } } +// Decode the given string and return contents as UTF8-encoded bytes. +// The input does not have the enclosing double-quotes. string JsonParser::decodeString(const string &s, bool binary) { string result; - for (string::const_iterator it = s.begin(); it != s.end(); ++it) { - char ch = *it; + auto it = s.cbegin(); + const auto end = s.cend(); + const auto readNextByte = [&]() -> char { + if (it == end) { + throw Exception("Unexpected EOF"); + } + return *it++; + }; + const auto unicodeParse = [&]() { + uint32_t n = 0; + for (int i = 0; i < 4; i++) { + auto c = readNextByte(); + n *= 16; + if (isdigit(c)) { + n += c - '0'; + } else if (c >= 'a' && c <= 'f') { + n += c - 'a' + 10; + } else if (c >= 'A' && c <= 'F') { + n += c - 'A' + 10; + } else { + throw Exception("Invalid hex character: {}", c); + } + } + return n; + }; + while (it != end) { + string::const_iterator startSeq = it; + char ch = readNextByte(); if (ch == '\\') { - ch = *++it; + ch = readNextByte(); switch (ch) { case '"': case '\\': @@ -343,48 +371,48 @@ string JsonParser::decodeString(const string &s, bool binary) { continue; case 'u': case 'U': { - uint32_t n = 0; - char e[4]; - for (char &i : e) { - n *= 16; - char c = *++it; - i = c; - if (isdigit(c)) { - n += c - '0'; - } else if (c >= 'a' && c <= 'f') { - n += c - 'a' + 10; - } else if (c >= 'A' && c <= 'F') { - n += c - 'A' + 10; - } - } + uint32_t n = unicodeParse(); if (binary) { if (n > 0xff) { - throw Exception(boost::format( - "Invalid byte for binary: %1%%2%") - % ch % string(e, 4)); + throw Exception("Invalid byte for binary: {}{}", ch, string(startSeq, ++it)); } else { - result.push_back(n); + result.push_back(static_cast(n)); continue; } } + if (n >= 0xd800 && n < 0xdc00) { + ch = readNextByte(); + if (ch != '\\') { + throw Exception("Invalid unicode sequence: {}", string(startSeq, it)); + } + ch = readNextByte(); + if (ch != 'u' && ch != 'U') { + throw Exception("Invalid unicode sequence: {}", string(startSeq, it)); + } + uint32_t m = unicodeParse(); + if (m < 0xdc00 || m > 0xdfff) { + throw Exception("Invalid unicode sequence: {}", string(startSeq, it)); + } + n = 0x10000 + (((n - 0xd800) << 10) | (m - 0xdc00)); + } else if (n >= 0xdc00 && n < 0xdfff) { + throw Exception("Invalid unicode sequence: {}", string(startSeq, it)); + } if (n < 0x80) { - result.push_back(n); + result.push_back(static_cast(n)); } else if (n < 0x800) { - result.push_back((n >> 6) | 0xc0); - result.push_back((n & 0x3f) | 0x80); + result.push_back(static_cast((n >> 6) | 0xc0)); + result.push_back(static_cast((n & 0x3f) | 0x80)); } else if (n < 0x10000) { - result.push_back((n >> 12) | 0xe0); - result.push_back(((n >> 6) & 0x3f) | 0x80); - result.push_back((n & 0x3f) | 0x80); - } else if (n < 110000) { - result.push_back((n >> 18) | 0xf0); - result.push_back(((n >> 12) & 0x3f) | 0x80); - result.push_back(((n >> 6) & 0x3f) | 0x80); - result.push_back((n & 0x3f) | 0x80); + result.push_back(static_cast((n >> 12) | 0xe0)); + result.push_back(static_cast(((n >> 6) & 0x3f) | 0x80)); + result.push_back(static_cast((n & 0x3f) | 0x80)); + } else if (n < 0x110000) { + result.push_back(static_cast((n >> 18) | 0xf0)); + result.push_back(static_cast(((n >> 12) & 0x3f) | 0x80)); + result.push_back(static_cast(((n >> 6) & 0x3f) | 0x80)); + result.push_back(static_cast((n & 0x3f) | 0x80)); } else { - throw Exception(boost::format( - "Invalid unicode value: %1%i%2%") - % ch % string(e, 4)); + throw Exception("Invalid unicode value: {}{}", n, string(startSeq, ++it)); } } continue; diff --git a/lang/c++/impl/json/JsonIO.hh b/lang/c++/impl/json/JsonIO.hh index 94889e5d010..e0c8eeff340 100644 --- a/lang/c++/impl/json/JsonIO.hh +++ b/lang/c++/impl/json/JsonIO.hh @@ -19,13 +19,13 @@ #ifndef avro_json_JsonIO_hh__ #define avro_json_JsonIO_hh__ -#include -#include -#include +#include +#include #include #include #include #include +#include #include "Config.hh" #include "Stream.hh" @@ -34,10 +34,10 @@ namespace avro { namespace json { inline char toHex(unsigned int n) { - return (n < 10) ? (n + '0') : (n + 'a' - 10); + return static_cast((n < 10) ? (n + '0') : (n + 'a' - 10)); } -class AVRO_DECL JsonParser : boost::noncopyable { +class AVRO_DECL JsonParser { public: enum class Token { Null, @@ -89,6 +89,9 @@ public: JsonParser() : curState(stValue), hasNext(false), nextChar(0), peeked(false), curToken(Token::Null), bv(false), lv(0), dv(0), line_(1) {} + JsonParser(const JsonParser &) = delete; + JsonParser &operator=(const JsonParser &) = delete; + void init(InputStream &is) { // Clear by swapping with an empty stack std::stack().swap(stateStack); @@ -263,11 +266,22 @@ class AVRO_DECL JsonGenerator { out_.write(toHex((static_cast(c)) % 16)); } - void escapeUnicode(uint32_t c) { + void escapeUnicode16(uint32_t c) { out_.write('\\'); out_.write('u'); - writeHex((c >> 8) & 0xff); - writeHex(c & 0xff); + writeHex(static_cast((c >> 8) & 0xff)); + writeHex(static_cast(c & 0xff)); + } + void escapeUnicode(uint32_t c) { + if (c < 0x10000) { + escapeUnicode16(c); + } else if (c < 0x110000) { + c -= 0x10000; + escapeUnicode16(((c >> 10) & 0x3ff) | 0xd800); + escapeUnicode16((c & 0x3ff) | 0xdc00); + } else { + throw Exception("Invalid code-point: {}", c); + } } void doEncodeString(const char *b, size_t len, bool binary) { const char *e = b + len; @@ -310,7 +324,6 @@ class AVRO_DECL JsonGenerator { switch (*p) { case '\\': case '"': - case '/': escape(*p, b, p); break; case '\b': @@ -391,23 +404,30 @@ public: } template - void encodeNumber(T t) { + std::enable_if_t, void> encodeNumber(T t) { sep(); std::ostringstream oss; - oss << boost::lexical_cast(t); + oss.imbue(std::locale::classic()); + oss << t; const std::string s = oss.str(); out_.writeBytes(reinterpret_cast(s.data()), s.size()); sep2(); } - void encodeNumber(double t) { + template + std::enable_if_t, void> encodeNumber(T t) { sep(); std::ostringstream oss; - if (boost::math::isfinite(t)) { - oss << boost::lexical_cast(t); - } else if (boost::math::isnan(t)) { + if (std::isfinite(t)) { + oss.imbue(std::locale::classic()); + if constexpr (std::is_same_v) { + oss << std::setprecision(9) << t; + } else { + oss << std::setprecision(17) << t; + } + } else if (std::isnan(t)) { oss << "NaN"; - } else if (t == std::numeric_limits::infinity()) { + } else if (t == std::numeric_limits::infinity()) { oss << "Infinity"; } else { oss << "-Infinity"; diff --git a/lang/c++/impl/parsing/JsonCodec.cc b/lang/c++/impl/parsing/JsonCodec.cc index 4fd04816069..fc073a98993 100644 --- a/lang/c++/impl/parsing/JsonCodec.cc +++ b/lang/c++/impl/parsing/JsonCodec.cc @@ -17,7 +17,6 @@ */ #include -#include #include #include #include @@ -162,8 +161,7 @@ class JsonDecoderHandler { case Symbol::Kind::Field: expectToken(in_, JsonParser::Token::String); if (s.extra() != in_.stringValue()) { - throw Exception(boost::format("Incorrect field: expected \"%1%\" but got \"%2%\".") % - s.extra() % in_.stringValue()); + throw Exception(R"(Incorrect field: expected "{}" but got "{}".)", s.extra(), in_.stringValue()); } break; default: @@ -241,8 +239,7 @@ int32_t JsonDecoder

::decodeInt() { expect(JsonParser::Token::Long); int64_t result = in_.longValue(); if (result < INT32_MIN || result > INT32_MAX) { - throw Exception(boost::format("Value out of range for Avro int: %1%") - % result); + throw Exception("Value out of range for Avro int: {}", result); } return static_cast(result); } @@ -496,6 +493,7 @@ class JsonEncoder : public Encoder { template void JsonEncoder::init(OutputStream &os) { out_.init(os); + parser_.reset(); } template @@ -540,7 +538,7 @@ void JsonEncoder::encodeFloat(float f) { out_.encodeString("Infinity"); } else if (-f == std::numeric_limits::infinity()) { out_.encodeString("-Infinity"); - } else if (boost::math::isnan(f)) { + } else if (std::isnan(f)) { out_.encodeString("NaN"); } else { out_.encodeNumber(f); @@ -554,7 +552,7 @@ void JsonEncoder::encodeDouble(double d) { out_.encodeString("Infinity"); } else if (-d == std::numeric_limits::infinity()) { out_.encodeString("-Infinity"); - } else if (boost::math::isnan(d)) { + } else if (std::isnan(d)) { out_.encodeString("NaN"); } else { out_.encodeNumber(d); diff --git a/lang/c++/impl/parsing/ResolvingDecoder.cc b/lang/c++/impl/parsing/ResolvingDecoder.cc index d86f6e58293..1553b8a4b62 100644 --- a/lang/c++/impl/parsing/ResolvingDecoder.cc +++ b/lang/c++/impl/parsing/ResolvingDecoder.cc @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include @@ -38,17 +40,14 @@ using std::make_shared; namespace parsing { -using std::make_shared; using std::shared_ptr; using std::static_pointer_cast; -using std::find_if; -using std::istringstream; using std::make_pair; using std::map; -using std::ostringstream; using std::pair; using std::reverse; +using std::set; using std::stack; using std::string; using std::unique_ptr; @@ -67,16 +66,7 @@ class ResolvingGrammarGenerator : public ValidatingGrammarGenerator { const NodePtr &reader, map &m, map &m2); - static vector> fields(const NodePtr &n) { - vector> result; - size_t c = n->names(); - for (size_t i = 0; i < c; ++i) { - result.emplace_back(n->nameAt(i), i); - } - return result; - } - - static int bestBranch(const NodePtr &writer, const NodePtr &reader); + static std::optional bestBranch(const NodePtr &writer, const NodePtr &reader); ProductionPtr getWriterProduction(const NodePtr &n, map &m2); @@ -101,8 +91,8 @@ Symbol ResolvingGrammarGenerator::generate( return Symbol::rootSymbol(main, backup); } -int ResolvingGrammarGenerator::bestBranch(const NodePtr &writer, - const NodePtr &reader) { +std::optional ResolvingGrammarGenerator::bestBranch(const NodePtr &writer, + const NodePtr &reader) { Type t = writer->type(); const size_t c = reader->leaves(); @@ -141,7 +131,7 @@ int ResolvingGrammarGenerator::bestBranch(const NodePtr &writer, break; } } - return -1; + return std::nullopt; } static shared_ptr> getAvroBinary( @@ -154,15 +144,6 @@ static shared_ptr> getAvroBinary( return snapshot(*os); } -template -struct equalsFirst { - const T1 &v_; - explicit equalsFirst(const T1 &v) : v_(v) {} - bool operator()(const pair &p) { - return p.first == v_; - } -}; - ProductionPtr ResolvingGrammarGenerator::getWriterProduction( const NodePtr &n, map &m2) { const NodePtr &nn = (n->type() == AVRO_SYMBOLIC) ? static_cast(*n).getNode() : n; @@ -182,10 +163,18 @@ ProductionPtr ResolvingGrammarGenerator::resolveRecords( map &m2) { ProductionPtr result = make_shared(); - vector> wf = fields(writer); - vector> rf = fields(reader); + vector wf(writer->names()); + for (size_t i = 0; i < wf.size(); ++i) { + wf[i] = writer->nameAt(i); + } + + set rf; + for (size_t i = 0; i < reader->names(); ++i) { + rf.emplace(i); + } + vector fieldOrder; - fieldOrder.reserve(reader->names()); + fieldOrder.reserve(rf.size()); /* * We look for all writer fields in the reader. If found, recursively @@ -193,19 +182,15 @@ ProductionPtr ResolvingGrammarGenerator::resolveRecords( * If no matching field is found for reader, arrange to skip the writer * field. */ - for (vector>::const_iterator it = wf.begin(); - it != wf.end(); ++it) { - auto it2 = find_if(rf.begin(), rf.end(), - equalsFirst(it->first)); - if (it2 != rf.end()) { - ProductionPtr p = doGenerate2(writer->leafAt(it->second), - reader->leafAt(it2->second), m, m2); + for (size_t wi = 0; wi != wf.size(); ++wi) { + size_t ri; + if (reader->nameIndex(wf[wi], ri)) { + ProductionPtr p = doGenerate2(writer->leafAt(wi), reader->leafAt(ri), m, m2); copy(p->rbegin(), p->rend(), back_inserter(*result)); - fieldOrder.push_back(it2->second); - rf.erase(it2); + fieldOrder.push_back(ri); + rf.erase(ri); } else { - ProductionPtr p = getWriterProduction( - writer->leafAt(it->second), m2); + ProductionPtr p = getWriterProduction(writer->leafAt(wi), m2); result->push_back(Symbol::skipStart()); if (p->size() == 1) { result->push_back((*p)[0]); @@ -216,24 +201,21 @@ ProductionPtr ResolvingGrammarGenerator::resolveRecords( } /* - * Examine the reader fields left out, (i.e. those didn't have corresponding + * Examine the reader fields left out (i.e. those didn't have corresponding * writer field). */ - for (vector>::const_iterator it = rf.begin(); - it != rf.end(); ++it) { - - NodePtr s = reader->leafAt(it->second); - fieldOrder.push_back(it->second); + for (const auto ri : rf) { + NodePtr s = reader->leafAt(ri); + fieldOrder.push_back(ri); if (s->type() == AVRO_SYMBOLIC) { s = resolveSymbol(s); } shared_ptr> defaultBinary = - getAvroBinary(reader->defaultValueAt(it->second)); + getAvroBinary(reader->defaultValueAt(ri)); result->push_back(Symbol::defaultStartAction(defaultBinary)); - map>::const_iterator it2 = - m.find(NodePair(s, s)); - ProductionPtr p = (it2 == m.end()) ? doGenerate2(s, s, m, m2) : it2->second; + auto it = m.find(NodePair(s, s)); + ProductionPtr p = it == m.end() ? doGenerate2(s, s, m, m2) : it->second; copy(p->rbegin(), p->rend(), back_inserter(*result)); result->push_back(Symbol::defaultEndAction()); } @@ -289,7 +271,7 @@ ProductionPtr ResolvingGrammarGenerator::doGenerate2( case AVRO_BYTES: return make_shared(1, Symbol::bytesSymbol()); case AVRO_FIXED: - if (writer->name() == reader->name() && writer->fixedSize() == reader->fixedSize()) { + if (writer->name().equalOrAliasedBy(reader->name()) && writer->fixedSize() == reader->fixedSize()) { ProductionPtr result = make_shared(); result->push_back(Symbol::sizeCheckSymbol(reader->fixedSize())); result->push_back(Symbol::fixedSymbol()); @@ -298,7 +280,7 @@ ProductionPtr ResolvingGrammarGenerator::doGenerate2( } break; case AVRO_RECORD: - if (writer->name() == reader->name()) { + if (writer->name().equalOrAliasedBy(reader->name())) { const pair key(writer, reader); map::const_iterator kp = m.find(key); if (kp != m.end()) { @@ -312,7 +294,7 @@ ProductionPtr ResolvingGrammarGenerator::doGenerate2( break; case AVRO_ENUM: - if (writer->name() == reader->name()) { + if (writer->name().equalOrAliasedBy(reader->name())) { ProductionPtr result = make_shared(); result->push_back(Symbol::enumAdjustSymbol(writer, reader)); result->push_back(Symbol::enumSymbol()); @@ -385,16 +367,18 @@ ProductionPtr ResolvingGrammarGenerator::doGenerate2( if (writerType == AVRO_INT || writerType == AVRO_LONG || writerType == AVRO_FLOAT) { return make_shared(1, - Symbol::resolveSymbol(writerType == AVRO_INT ? Symbol::Kind::Int : writerType == AVRO_LONG ? Symbol::Kind::Long : Symbol::Kind::Float, Symbol::Kind::Double)); + Symbol::resolveSymbol(writerType == AVRO_INT ? Symbol::Kind::Int : writerType == AVRO_LONG ? Symbol::Kind::Long + : Symbol::Kind::Float, + Symbol::Kind::Double)); } break; case AVRO_UNION: { - int j = bestBranch(writer, reader); - if (j >= 0) { - ProductionPtr p = doGenerate2(writer, reader->leafAt(j), m, m2); + auto j = bestBranch(writer, reader); + if (j) { + ProductionPtr p = doGenerate2(writer, reader->leafAt(*j), m, m2); ProductionPtr result = make_shared(); - result->push_back(Symbol::unionAdjustSymbol(j, p)); + result->push_back(Symbol::unionAdjustSymbol(*j, p)); result->push_back(Symbol::unionSymbol()); return result; } @@ -530,13 +514,18 @@ int64_t ResolvingDecoderImpl

::decodeLong() { template float ResolvingDecoderImpl

::decodeFloat() { Symbol::Kind k = parser_.advance(Symbol::Kind::Float); - return k == Symbol::Kind::Int ? base_->decodeInt() : k == Symbol::Kind::Long ? base_->decodeLong() : base_->decodeFloat(); + return k == Symbol::Kind::Int ? static_cast(base_->decodeInt()) + : k == Symbol::Kind::Long ? static_cast(base_->decodeLong()) + : base_->decodeFloat(); } template double ResolvingDecoderImpl

::decodeDouble() { Symbol::Kind k = parser_.advance(Symbol::Kind::Double); - return k == Symbol::Kind::Int ? base_->decodeInt() : k == Symbol::Kind::Long ? base_->decodeLong() : k == Symbol::Kind::Float ? base_->decodeFloat() : base_->decodeDouble(); + return k == Symbol::Kind::Int ? static_cast(base_->decodeInt()) + : k == Symbol::Kind::Long ? static_cast(base_->decodeLong()) + : k == Symbol::Kind::Float ? base_->decodeFloat() + : base_->decodeDouble(); } template diff --git a/lang/c++/impl/parsing/Symbol.cc b/lang/c++/impl/parsing/Symbol.cc index b7a35517af8..fe87c5205b4 100644 --- a/lang/c++/impl/parsing/Symbol.cc +++ b/lang/c++/impl/parsing/Symbol.cc @@ -75,7 +75,7 @@ Symbol Symbol::enumAdjustSymbol(const NodePtr &writer, const NodePtr &reader) { } size_t wc = writer->names(); - vector adj; + vector adj; // enums are encoded as ints adj.reserve(wc); vector err; @@ -85,10 +85,10 @@ Symbol Symbol::enumAdjustSymbol(const NodePtr &writer, const NodePtr &reader) { vector::const_iterator it = find(rs.begin(), rs.end(), s); if (it == rs.end()) { auto pos = err.size() + 1; - adj.push_back(-pos); + adj.push_back(static_cast(-pos)); err.push_back(s); } else { - adj.push_back(it - rs.begin()); + adj.push_back(static_cast(it - rs.begin())); } } return Symbol(Kind::EnumAdjust, make_pair(adj, err)); diff --git a/lang/c++/impl/parsing/Symbol.hh b/lang/c++/impl/parsing/Symbol.hh index 21e46a85ae4..08b97240050 100644 --- a/lang/c++/impl/parsing/Symbol.hh +++ b/lang/c++/impl/parsing/Symbol.hh @@ -19,16 +19,16 @@ #ifndef avro_parsing_Symbol_hh__ #define avro_parsing_Symbol_hh__ +#include +#include #include #include #include #include +#include #include #include -#include -#include - #include "Decoder.hh" #include "Exception.hh" #include "Node.hh" @@ -38,10 +38,10 @@ namespace parsing { class Symbol; -typedef std::vector Production; -typedef std::shared_ptr ProductionPtr; -typedef boost::tuple, bool, ProductionPtr, ProductionPtr> RepeaterInfo; -typedef boost::tuple RootInfo; +using Production = std::vector; +using ProductionPtr = std::shared_ptr; +using RepeaterInfo = std::tuple, bool, ProductionPtr, ProductionPtr>; +using RootInfo = std::tuple; class Symbol { public: @@ -91,7 +91,7 @@ public: private: Kind kind_; - boost::any extra_; + std::any extra_; explicit Symbol(Kind k) : kind_(k) {} template @@ -104,17 +104,17 @@ public: template T extra() const { - return boost::any_cast(extra_); + return std::any_cast(extra_); } template T *extrap() { - return boost::any_cast(&extra_); + return std::any_cast(&extra_); } template const T *extrap() const { - return boost::any_cast(&extra_); + return std::any_cast(&extra_); } template @@ -339,8 +339,8 @@ void fixup(Symbol &s, const std::map &m, } break; case Symbol::Kind::Repeater: { const RepeaterInfo &ri = *s.extrap(); - fixup_internal(boost::tuples::get<2>(ri), m, seen); - fixup_internal(boost::tuples::get<3>(ri), m, seen); + fixup_internal(std::get<2>(ri), m, seen); + fixup_internal(std::get<3>(ri), m, seen); } break; case Symbol::Kind::Placeholder: { typename std::map>::const_iterator it = @@ -363,6 +363,10 @@ template class SimpleParser { Decoder *decoder_; Handler &handler_; + /* + * parsingStack always has root at the bottom of it. + * So it is safe to call top() on it. + */ std::stack parsingStack; static void throwMismatch(Symbol::Kind actual, Symbol::Kind expected) { @@ -414,7 +418,7 @@ public: } else { switch (s.kind()) { case Symbol::Kind::Root: - append(boost::tuples::get<0>(*s.extrap())); + append(std::get<0>(*s.extrap())); continue; case Symbol::Kind::Indirect: { ProductionPtr pp = @@ -432,7 +436,7 @@ public: continue; case Symbol::Kind::Repeater: { auto *p = s.extrap(); - std::stack &ns = boost::tuples::get<0>(*p); + std::stack &ns = std::get<0>(*p); if (ns.empty()) { throw Exception( "Empty item count stack in repeater advance"); @@ -442,7 +446,7 @@ public: "Zero item count in repeater advance"); } --ns.top(); - append(boost::tuples::get<2>(*p)); + append(std::get<2>(*p)); } continue; case Symbol::Kind::Error: @@ -522,7 +526,7 @@ public: } Symbol &t2 = parsingStack.top(); auto *p = t2.extrap(); - boost::tuples::get<0>(*p).push(n); + std::get<0>(*p).push(n); continue; } case Symbol::Kind::ArrayEnd: @@ -537,7 +541,7 @@ public: } Symbol &t2 = parsingStack.top(); auto *p2 = t2.extrap(); - boost::tuples::get<0>(*p2).push(n); + std::get<0>(*p2).push(n); continue; } case Symbol::Kind::MapEnd: @@ -559,19 +563,19 @@ public: } case Symbol::Kind::Repeater: { auto *p = t.extrap(); - std::stack &ns = boost::tuples::get<0>(*p); + std::stack &ns = std::get<0>(*p); if (ns.empty()) { throw Exception( "Empty item count stack in repeater skip"); } ssize_t &n = ns.top(); if (n == 0) { - n = boost::tuples::get<1>(*p) ? d.arrayNext() - : d.mapNext(); + n = std::get<1>(*p) ? d.arrayNext() + : d.mapNext(); } if (n != 0) { --n; - append(boost::tuples::get<3>(*p)); + append(std::get<3>(*p)); continue; } else { ns.pop(); @@ -675,7 +679,7 @@ public: Symbol &s = parsingStack.top(); assertMatch(Symbol::Kind::Repeater, s.kind()); auto *p = s.extrap(); - std::stack &nn = boost::tuples::get<0>(*p); + std::stack &nn = std::get<0>(*p); nn.push(n); } @@ -684,7 +688,7 @@ public: Symbol &s = parsingStack.top(); assertMatch(Symbol::Kind::Repeater, s.kind()); auto *p = s.extrap(); - std::stack &nn = boost::tuples::get<0>(*p); + std::stack &nn = std::get<0>(*p); if (nn.empty() || nn.top() != 0) { throw Exception("Wrong number of items"); } @@ -696,7 +700,7 @@ public: Symbol &s = parsingStack.top(); assertMatch(Symbol::Kind::Repeater, s.kind()); auto *p = s.extrap(); - std::stack &ns = boost::tuples::get<0>(*p); + std::stack &ns = std::get<0>(*p); if (ns.empty()) { throw Exception("Incorrect number of items (empty)"); } @@ -742,6 +746,14 @@ public: } else if (s.kind() == Symbol::Kind::SkipStart) { parsingStack.pop(); skip(*decoder_); + } else if (s.kind() == Symbol::Kind::Indirect) { + ProductionPtr pp = s.extra(); + parsingStack.pop(); + append(pp); + } else if (s.kind() == Symbol::Kind::Symbolic) { + ProductionPtr pp(s.extra>()); + parsingStack.pop(); + append(pp); } else { break; } @@ -756,6 +768,8 @@ public: while (parsingStack.size() > 1) { parsingStack.pop(); } + Symbol &s = parsingStack.top(); + append(std::get<0>(*s.extrap())); } }; @@ -775,8 +789,8 @@ inline std::ostream &operator<<(std::ostream &os, const Symbol &s) { case Symbol::Kind::Repeater: { const RepeaterInfo &ri = *s.extrap(); os << '(' << Symbol::toString(s.kind()) - << ' ' << *boost::tuples::get<2>(ri) - << ' ' << *boost::tuples::get<3>(ri) + << ' ' << *std::get<2>(ri) + << ' ' << *std::get<3>(ri) << ')'; } break; case Symbol::Kind::Indirect: { diff --git a/lang/c++/impl/parsing/ValidatingCodec.cc b/lang/c++/impl/parsing/ValidatingCodec.cc index cfb82225f15..9ec1f040600 100644 --- a/lang/c++/impl/parsing/ValidatingCodec.cc +++ b/lang/c++/impl/parsing/ValidatingCodec.cc @@ -19,7 +19,6 @@ #include "ValidatingCodec.hh" #include -#include #include #include #include @@ -152,7 +151,7 @@ ProductionPtr ValidatingGrammarGenerator::doGenerate(const NodePtr &n, } struct DummyHandler { - static size_t handle(const Symbol &s) { + static size_t handle(const Symbol &) { return 0; } }; @@ -502,6 +501,7 @@ void ValidatingEncoder

::setItemCount(size_t count) { template void ValidatingEncoder

::startItem() { + parser_.processImplicitActions(); if (parser_.top() != Symbol::Kind::Repeater) { throw Exception("startItem at not an item boundary"); } diff --git a/lang/c++/api/AvroParse.hh b/lang/c++/include/avro/AvroParse.hh similarity index 100% rename from lang/c++/api/AvroParse.hh rename to lang/c++/include/avro/AvroParse.hh diff --git a/lang/c++/api/AvroSerialize.hh b/lang/c++/include/avro/AvroSerialize.hh similarity index 100% rename from lang/c++/api/AvroSerialize.hh rename to lang/c++/include/avro/AvroSerialize.hh diff --git a/lang/c++/api/AvroTraits.hh b/lang/c++/include/avro/AvroTraits.hh similarity index 94% rename from lang/c++/api/AvroTraits.hh rename to lang/c++/include/avro/AvroTraits.hh index 7b5a636ec33..465470a9382 100644 --- a/lang/c++/api/AvroTraits.hh +++ b/lang/c++/include/avro/AvroTraits.hh @@ -60,10 +60,10 @@ struct is_defined { typedef char no[2]; template - static yes &test(char (*)[sizeof(U)]) { throw 0; }; + static yes &test(char (*)[sizeof(U)]) { throw 0; } template - static no &test(...) { throw 0; }; + static no &test(...) { throw 0; } static const bool value = sizeof(test(0)) == sizeof(yes); }; @@ -82,10 +82,10 @@ struct is_not_defined { typedef char no[2]; template - static yes &test(char (*)[sizeof(U)]) { throw 0; }; + static yes &test(char (*)[sizeof(U)]) { throw 0; } template - static no &test(...) { throw 0; }; + static no &test(...) { throw 0; } static const bool value = sizeof(test(0)) == sizeof(no); }; diff --git a/lang/c++/api/Compiler.hh b/lang/c++/include/avro/Compiler.hh similarity index 89% rename from lang/c++/api/Compiler.hh rename to lang/c++/include/avro/Compiler.hh index bdcbb355e28..9c9ec8a71fb 100644 --- a/lang/c++/api/Compiler.hh +++ b/lang/c++/include/avro/Compiler.hh @@ -22,17 +22,19 @@ #include "Config.hh" #include #include +#include namespace avro { -class AVRO_DECL InputStream; +class InputStream; /// This class is used to implement an avro spec parser using a flex/bison /// compiler. In order for the lexer to be reentrant, this class provides a /// lexer object for each parse. The bison parser also uses this class to /// build up an avro parse tree as the avro spec is parsed. -class AVRO_DECL ValidSchema; +class Name; +class ValidSchema; /// Given a stream containing a JSON schema, compiles the schema to a /// ValidSchema object. Throws if the schema cannot be compiled to a valid @@ -58,6 +60,9 @@ AVRO_DECL ValidSchema compileJsonSchemaFromString(const std::string &input); AVRO_DECL ValidSchema compileJsonSchemaFromFile(const char *filename); +AVRO_DECL ValidSchema compileJsonSchemaWithNamedReferences(std::istream &is, + const std::map &namedReferences); + } // namespace avro #endif diff --git a/lang/c++/include/avro/Config.hh b/lang/c++/include/avro/Config.hh new file mode 100644 index 00000000000..5f5becc0dc2 --- /dev/null +++ b/lang/c++/include/avro/Config.hh @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_Config_hh +#define avro_Config_hh + +#ifdef _MSC_VER +#pragma warning(disable : 4275 4251) +#endif // _MSC_VER + +/* + * Symbol visibility macros: + * - AVRO_DLL_EXPORT annotation for exporting symbols + * - AVRO_DLL_IMPORT annotation for importing symbols + * - AVRO_DLL_HIDDEN annotation for hiding symbols + * - AVRO_DYN_LINK needs to be defined when compiling / linking avro as dynamic library + * - AVRO_SOURCE needs to be defined when compiling avro as library + * - AVRO_DECL contains the correct symbol visibility annotation depending on AVRO_DYN_LINK and AVRO_SOURCE + */ + +#if defined _WIN32 || defined __CYGWIN__ +#define AVRO_DLL_EXPORT __declspec(dllexport) +#define AVRO_DLL_IMPORT __declspec(dllimport) +#define AVRO_DLL_HIDDEN +#else +#define AVRO_DLL_EXPORT [[gnu::visibility("default")]] +#define AVRO_DLL_IMPORT [[gnu::visibility("default")]] +#define AVRO_DLL_HIDDEN [[gnu::visibility("hidden")]] +#endif // _WIN32 || __CYGWIN__ + +#ifdef AVRO_DYN_LINK +#ifdef AVRO_SOURCE +#define AVRO_DECL AVRO_DLL_EXPORT +#else +#define AVRO_DECL AVRO_DLL_IMPORT +#endif // AVRO_SOURCE +#endif // AVRO_DYN_LINK + +#ifndef AVRO_DECL +#define AVRO_DECL +#endif + +#ifdef _WIN32 +#include +using ssize_t = SSIZE_T; +#endif // _WIN32 + +#endif diff --git a/lang/c++/include/avro/CustomAttributes.hh b/lang/c++/include/avro/CustomAttributes.hh new file mode 100644 index 00000000000..72f4acaec03 --- /dev/null +++ b/lang/c++/include/avro/CustomAttributes.hh @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_CustomAttributes_hh__ +#define avro_CustomAttributes_hh__ + +#include "Config.hh" +#include +#include +#include +#include +#include + +namespace avro { + +// CustomAttributes class stores avro custom attributes. +// Each attribute is represented by a unique name and value. +// User is supposed to create CustomAttributes object and then add it to Schema. +class AVRO_DECL CustomAttributes { +public: + // Retrieves the custom attribute json entity for that attributeName, returns an + // null if the attribute doesn't exist. + std::optional getAttribute(const std::string &name) const; + + // Adds a custom attribute. If the attribute already exists, throw an exception. + // + // If `addQuotes` is true, the `value` will be wrapped in double quotes in the + // json serialization; otherwise, the `value` will be serialized as is. + void addAttribute(const std::string &name, const std::string &value, bool addQuotes = true); + + // Provides a way to iterate over the custom attributes or check attribute size. + const std::map &attributes() const { + return attributes_; + } + + // Prints the attribute value for the specific attribute. + void printJson(std::ostream &os, const std::string &name) const; + +private: + std::map attributes_; + std::unordered_set keysNeedQuotes_; +}; + +} // namespace avro + +#endif diff --git a/lang/c++/api/DataFile.hh b/lang/c++/include/avro/DataFile.hh similarity index 81% rename from lang/c++/api/DataFile.hh rename to lang/c++/include/avro/DataFile.hh index 94a1dab8e31..4a16a3bd5d3 100644 --- a/lang/c++/api/DataFile.hh +++ b/lang/c++/include/avro/DataFile.hh @@ -26,52 +26,67 @@ #include "ValidSchema.hh" #include "buffer/Buffer.hh" +#include #include +#include #include #include -#include "array" -#include "boost/utility.hpp" -#include - namespace avro { /** Specify type of compression to use when writing data files. */ enum Codec { - NULL_CODEC, - DEFLATE_CODEC, - -#ifdef SNAPPY_CODEC_AVAILABLE - SNAPPY_CODEC -#endif - + NULL_CODEC = 0, + DEFLATE_CODEC = 1, + SNAPPY_CODEC = 2, + ZSTD_CODEC = 3 }; +/** + * Returns true if the specified codec is available at runtime. + */ +AVRO_DECL bool isCodecAvailable(Codec codec); + const int SyncSize = 16; /** * The sync value. */ typedef std::array DataFileSync; +/** + * Avro files may include arbitrary user-specified metadata. + * File metadata is written as if defined by the following map schema: + * + * `{"type": "map", "values": "bytes"}` + * + * All metadata properties that start with "avro." are reserved. + * The following file metadata properties are currently used: + * + * - `avro.schema` contains the schema of objects stored in the file, as JSON data (required). + * - `avro.codec`, the name of the compression codec used to compress blocks, as a string. + * Implementations are required to support the following codecs: "null" and "deflate". + * If codec is absent, it is assumed to be "null". See avro.codecs for implementation details. + */ +typedef std::map> Metadata; + /** * Type-independent portion of DataFileWriter. * At any given point in time, at most one file can be written using * this object. */ -class AVRO_DECL DataFileWriterBase : boost::noncopyable { +class AVRO_DECL DataFileWriterBase { const std::string filename_; const ValidSchema schema_; const EncoderPtr encoderPtr_; const size_t syncInterval_; Codec codec_; + std::optional compressionLevel_; std::unique_ptr stream_; std::unique_ptr buffer_; const DataFileSync sync_; int64_t objectCount_; - typedef std::map> Metadata; - Metadata metadata_; int64_t lastSync_; @@ -118,9 +133,14 @@ public: * Constructs a data file writer with the given sync interval and name. */ DataFileWriterBase(const char *filename, const ValidSchema &schema, - size_t syncInterval, Codec codec = NULL_CODEC); + size_t syncInterval, Codec codec = NULL_CODEC, + const Metadata &metadata = {}, std::optional compressionLevel = std::nullopt); DataFileWriterBase(std::unique_ptr outputStream, - const ValidSchema &schema, size_t syncInterval, Codec codec); + const ValidSchema &schema, size_t syncInterval, Codec codec, + const Metadata &metadata = {}, std::optional compressionLevel = std::nullopt); + + DataFileWriterBase(const DataFileWriterBase &) = delete; + DataFileWriterBase &operator=(const DataFileWriterBase &) = delete; ~DataFileWriterBase(); /** @@ -144,7 +164,7 @@ public: * An Avro datafile that can store objects of type T. */ template -class DataFileWriter : boost::noncopyable { +class DataFileWriter { std::unique_ptr base_; public: @@ -152,10 +172,17 @@ public: * Constructs a new data file. */ DataFileWriter(const char *filename, const ValidSchema &schema, - size_t syncInterval = 16 * 1024, Codec codec = NULL_CODEC) : base_(new DataFileWriterBase(filename, schema, syncInterval, codec)) {} + size_t syncInterval = 16 * 1024, Codec codec = NULL_CODEC, + const Metadata &metadata = {}, std::optional compressionLevel = std::nullopt) + : base_(std::make_unique(filename, schema, syncInterval, codec, metadata, compressionLevel)) {} DataFileWriter(std::unique_ptr outputStream, const ValidSchema &schema, - size_t syncInterval = 16 * 1024, Codec codec = NULL_CODEC) : base_(new DataFileWriterBase(std::move(outputStream), schema, syncInterval, codec)) {} + size_t syncInterval = 16 * 1024, Codec codec = NULL_CODEC, + const Metadata &metadata = {}, std::optional compressionLevel = std::nullopt) + : base_(std::make_unique(std::move(outputStream), schema, syncInterval, codec, metadata, compressionLevel)) {} + + DataFileWriter(const DataFileWriter &) = delete; + DataFileWriter &operator=(const DataFileWriter &) = delete; /** * Writes the given piece of data into the file. @@ -191,9 +218,9 @@ public: /** * The type independent portion of reader. */ -class AVRO_DECL DataFileReaderBase : boost::noncopyable { +class AVRO_DECL DataFileReaderBase { const std::string filename_; - const std::unique_ptr stream_; + std::unique_ptr stream_; const DecoderPtr decoder_; int64_t objectCount_; bool eof_; @@ -205,13 +232,11 @@ class AVRO_DECL DataFileReaderBase : boost::noncopyable { ValidSchema dataSchema_; DecoderPtr dataDecoder_; std::unique_ptr dataStream_; - typedef std::map> Metadata; Metadata metadata_; DataFileSync sync_{}; // for compressed buffer - std::unique_ptr os_; std::vector compressed_; std::string uncompressed; void readHeader(); @@ -245,6 +270,9 @@ public: explicit DataFileReaderBase(std::unique_ptr inputStream); + DataFileReaderBase(const DataFileReaderBase &) = delete; + DataFileReaderBase &operator=(const DataFileReaderBase &) = delete; + /** * Initializes the reader so that the reader and writer schemas * are the same. @@ -297,13 +325,18 @@ public: * Return the last synchronization point before our current position. */ int64_t previousSync() const; + + /** + * Return the metadata for the data file. + */ + const Metadata &metadata() const { return metadata_; } }; /** * Reads the contents of data file one after another. */ template -class DataFileReader : boost::noncopyable { +class DataFileReader { std::unique_ptr base_; public: @@ -358,6 +391,9 @@ public: base_->init(readerSchema); } + DataFileReader(const DataFileReader &) = delete; + DataFileReader &operator=(const DataFileReader &) = delete; + /** * Reads the next entry from the data file. * \return true if an object has been successfully read into \p datum and @@ -409,6 +445,11 @@ public: * Return the last synchronization point before our current position. */ int64_t previousSync() { return base_->previousSync(); } + + /** + * Return the metadata for the data file. + */ + const Metadata &metadata() const { return base_->metadata(); } }; } // namespace avro diff --git a/lang/c++/api/Decoder.hh b/lang/c++/include/avro/Decoder.hh similarity index 100% rename from lang/c++/api/Decoder.hh rename to lang/c++/include/avro/Decoder.hh diff --git a/lang/c++/api/Encoder.hh b/lang/c++/include/avro/Encoder.hh similarity index 100% rename from lang/c++/api/Encoder.hh rename to lang/c++/include/avro/Encoder.hh diff --git a/lang/c++/api/Exception.hh b/lang/c++/include/avro/Exception.hh similarity index 84% rename from lang/c++/api/Exception.hh rename to lang/c++/include/avro/Exception.hh index 691869bed8c..234a1c93023 100644 --- a/lang/c++/api/Exception.hh +++ b/lang/c++/include/avro/Exception.hh @@ -20,19 +20,21 @@ #define avro_Exception_hh__ #include "Config.hh" -#include +#include #include namespace avro { /// Wrapper for std::runtime_error that provides convenience constructor -/// for boost::format objects +/// for formatted messages class AVRO_DECL Exception : public virtual std::runtime_error { public: explicit Exception(const std::string &msg) : std::runtime_error(msg) {} - explicit Exception(const boost::format &msg) : std::runtime_error(boost::str(msg)) {} + template + Exception(fmt::format_string fmt, Args &&...args) + : std::runtime_error(fmt::format(fmt, std::forward(args)...)) {} }; } // namespace avro diff --git a/lang/c++/api/Generic.hh b/lang/c++/include/avro/Generic.hh similarity index 93% rename from lang/c++/api/Generic.hh rename to lang/c++/include/avro/Generic.hh index f35e8cf6896..2eb34479998 100644 --- a/lang/c++/api/Generic.hh +++ b/lang/c++/include/avro/Generic.hh @@ -19,8 +19,6 @@ #ifndef avro_Generic_hh__ #define avro_Generic_hh__ -#include - #include "Config.hh" #include "Decoder.hh" #include "Encoder.hh" @@ -31,7 +29,7 @@ namespace avro { /** * A utility class to read generic datum from decoders. */ -class AVRO_DECL GenericReader : boost::noncopyable { +class AVRO_DECL GenericReader { const ValidSchema schema_; const bool isResolving_; const DecoderPtr decoder_; @@ -52,6 +50,9 @@ public: GenericReader(const ValidSchema &writerSchema, const ValidSchema &readerSchema, const DecoderPtr &decoder); + GenericReader(const GenericReader &) = delete; + GenericReader &operator=(const GenericReader &) = delete; + /** * Reads a value off the decoder. */ @@ -79,7 +80,7 @@ public: /** * A utility class to write generic datum to encoders. */ -class AVRO_DECL GenericWriter : boost::noncopyable { +class AVRO_DECL GenericWriter { const ValidSchema schema_; const EncoderPtr encoder_; @@ -91,6 +92,9 @@ public: */ GenericWriter(ValidSchema s, EncoderPtr encoder); + GenericWriter(const GenericWriter &) = delete; + GenericWriter &operator=(const GenericWriter &) = delete; + /** * Writes a value onto the encoder. */ diff --git a/lang/c++/api/GenericDatum.hh b/lang/c++/include/avro/GenericDatum.hh similarity index 90% rename from lang/c++/api/GenericDatum.hh rename to lang/c++/include/avro/GenericDatum.hh index f58fd949950..1b1c3d9af87 100644 --- a/lang/c++/api/GenericDatum.hh +++ b/lang/c++/include/avro/GenericDatum.hh @@ -19,17 +19,12 @@ #ifndef avro_GenericDatum_hh__ #define avro_GenericDatum_hh__ +#include #include #include #include #include -#if __cplusplus >= 201703L -#include -#else -#include "boost/any.hpp" -#endif - #include "LogicalType.hh" #include "Node.hh" #include "ValidSchema.hh" @@ -48,7 +43,7 @@ namespace avro { * \li Avro float maps to C++ float. * \li Avro double maps to C++ double. * \li Avro string maps to C++ std::string. - * \li Avro bytes maps to C++ std::vector<uint_t>. + * \li Avro bytes maps to C++ std::vector<uint8_t>. * \li Avro fixed maps to C++ class GenericFixed. * \li Avro enum maps to C++ class GenericEnum. * \li Avro array maps to C++ class GenericArray. @@ -62,11 +57,7 @@ class AVRO_DECL GenericDatum { protected: Type type_; LogicalType logicalType_; -#if __cplusplus >= 201703L std::any value_; -#else - boost::any value_; -#endif explicit GenericDatum(Type t) : type_(t), logicalType_(LogicalType::NONE) {} @@ -192,11 +183,7 @@ public: template GenericDatum(const NodePtr &schema, const T &v) : type_(schema->type()), logicalType_(schema->logicalType()) { init(schema); -#if __cplusplus >= 201703L *std::any_cast(&value_) = v; -#else - *boost::any_cast(&value_) = v; -#endif } /** @@ -539,65 +526,33 @@ public: }; inline Type GenericDatum::type() const { - return (type_ == AVRO_UNION) ? -#if __cplusplus >= 201703L - std::any_cast(&value_)->datum().type() - : -#else - boost::any_cast(&value_)->datum().type() - : -#endif - type_; + return (type_ == AVRO_UNION) ? std::any_cast(&value_)->datum().type() + : type_; } inline LogicalType GenericDatum::logicalType() const { - return (type_ == AVRO_UNION) ? -#if __cplusplus >= 201703L - std::any_cast(&value_)->datum().logicalType() : -#else - boost::any_cast(&value_)->datum().logicalType() : -#endif - logicalType_; + return (type_ == AVRO_UNION) ? std::any_cast(&value_)->datum().logicalType() + : logicalType_; } template T &GenericDatum::value() { - return (type_ == AVRO_UNION) ? -#if __cplusplus >= 201703L - std::any_cast(&value_)->datum().value() + return (type_ == AVRO_UNION) ? std::any_cast(&value_)->datum().value() : *std::any_cast(&value_); -#else - boost::any_cast(&value_)->datum().value() - : *boost::any_cast(&value_); -#endif } template const T &GenericDatum::value() const { - return (type_ == AVRO_UNION) ? -#if __cplusplus >= 201703L - std::any_cast(&value_)->datum().value() + return (type_ == AVRO_UNION) ? std::any_cast(&value_)->datum().value() : *std::any_cast(&value_); -#else - boost::any_cast(&value_)->datum().value() - : *boost::any_cast(&value_); -#endif } inline size_t GenericDatum::unionBranch() const { -#if __cplusplus >= 201703L return std::any_cast(&value_)->currentBranch(); -#else - return boost::any_cast(&value_)->currentBranch(); -#endif } inline void GenericDatum::selectBranch(size_t branch) { -#if __cplusplus >= 201703L std::any_cast(&value_)->selectBranch(branch); -#else - boost::any_cast(&value_)->selectBranch(branch); -#endif } } // namespace avro diff --git a/lang/c++/api/Layout.hh b/lang/c++/include/avro/Layout.hh similarity index 92% rename from lang/c++/api/Layout.hh rename to lang/c++/include/avro/Layout.hh index 56d2c1d9c74..cbc05df4a6d 100644 --- a/lang/c++/api/Layout.hh +++ b/lang/c++/include/avro/Layout.hh @@ -20,17 +20,21 @@ #define avro_Layout_hh__ #include "Config.hh" -#include + +#include /// \file Layout.hh /// namespace avro { -class AVRO_DECL Layout : private boost::noncopyable { +class AVRO_DECL Layout { protected: explicit Layout(size_t offset = 0) : offset_(offset) {} + Layout(const Layout &) = delete; + Layout &operator=(const Layout &) = delete; + public: size_t offset() const { return offset_; diff --git a/lang/c++/include/avro/LogicalType.hh b/lang/c++/include/avro/LogicalType.hh new file mode 100644 index 00000000000..0663ad3f6bb --- /dev/null +++ b/lang/c++/include/avro/LogicalType.hh @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_LogicalType_hh__ +#define avro_LogicalType_hh__ + +#include +#include +#include +#include +#include + +#include "Config.hh" +#include + +namespace avro { + +class CustomLogicalType; + +class AVRO_DECL LogicalType { +public: + enum Type { + NONE, + BIG_DECIMAL, + DECIMAL, + DATE, + TIME_MILLIS, + TIME_MICROS, + TIMESTAMP_MILLIS, + TIMESTAMP_MICROS, + TIMESTAMP_NANOS, + LOCAL_TIMESTAMP_MILLIS, + LOCAL_TIMESTAMP_MICROS, + LOCAL_TIMESTAMP_NANOS, + DURATION, + UUID, + CUSTOM // for registered custom logical types + }; + + explicit LogicalType(Type type); + explicit LogicalType(std::shared_ptr custom); + + Type type() const; + + // Precision and scale can only be set for the DECIMAL logical type. + // Precision must be positive and scale must be either positive or zero. The + // setters will throw an exception if they are called on any type other + // than DECIMAL. + void setPrecision(int32_t precision); + int32_t precision() const { return precision_; } + void setScale(int32_t scale); + int32_t scale() const { return scale_; } + + const std::shared_ptr &customLogicalType() const { + return custom_; + } + + void printJson(std::ostream &os) const; + +private: + Type type_; + int32_t precision_; + int32_t scale_; + std::shared_ptr custom_; +}; + +class AVRO_DECL CustomLogicalType { +public: + CustomLogicalType(const std::string &name) : name_(name) {} + + virtual ~CustomLogicalType() = default; + + const std::string &name() const { return name_; } + + virtual void printJson(std::ostream &os) const; + +private: + std::string name_; +}; + +// Registry for custom logical types. +// This class is thread-safe. +class AVRO_DECL CustomLogicalTypeRegistry { +public: + static CustomLogicalTypeRegistry &instance(); + + using Factory = std::function(const std::string &json)>; + + // Register a custom logical type and its factory function. + void registerType(const std::string &name, Factory factory); + + // Create a custom logical type from a JSON string. + // Returns nullptr if the name is not registered. + std::shared_ptr create(const std::string &name, const std::string &json) const; + +private: + CustomLogicalTypeRegistry() = default; + + std::unordered_map registry_; + mutable std::mutex mutex_; +}; + +} // namespace avro + +#endif diff --git a/lang/c++/api/Node.hh b/lang/c++/include/avro/Node.hh similarity index 80% rename from lang/c++/api/Node.hh rename to lang/c++/include/avro/Node.hh index 3c9389da50a..72e85e0ee4d 100644 --- a/lang/c++/api/Node.hh +++ b/lang/c++/include/avro/Node.hh @@ -21,11 +21,12 @@ #include "Config.hh" -#include #include #include #include +#include +#include "CustomAttributes.hh" #include "Exception.hh" #include "LogicalType.hh" #include "SchemaResolution.hh" @@ -39,30 +40,38 @@ class GenericDatum; using NodePtr = std::shared_ptr; class AVRO_DECL Name { + struct Aliases; + std::string ns_; std::string simpleName_; + std::unique_ptr aliases_; public: - Name() = default; - explicit Name(const std::string &fullname); - Name(std::string simpleName, std::string ns) : ns_(std::move(ns)), simpleName_(std::move(simpleName)) { check(); } + Name(); + explicit Name(const std::string &name); + Name(std::string simpleName, std::string ns); + Name(const Name &other); + Name &operator=(const Name &other); + Name(Name &&other); + Name &operator=(Name &&other); + ~Name(); std::string fullname() const; const std::string &ns() const { return ns_; } const std::string &simpleName() const { return simpleName_; } + const std::vector &aliases() const; void ns(std::string n) { ns_ = std::move(n); } void simpleName(std::string n) { simpleName_ = std::move(n); } void fullname(const std::string &n); + void addAlias(const std::string &alias); bool operator<(const Name &n) const; void check() const; bool operator==(const Name &n) const; bool operator!=(const Name &n) const { return !((*this) == n); } - void clear() { - ns_.clear(); - simpleName_.clear(); - } + bool equalOrAliasedBy(const Name &n) const; + void clear(); explicit operator std::string() const { return fullname(); } @@ -87,12 +96,15 @@ inline std::ostream &operator<<(std::ostream &os, const Name &n) { /// different node types. /// -class AVRO_DECL Node : private boost::noncopyable { +class AVRO_DECL Node { public: explicit Node(Type type) : type_(type), logicalType_(LogicalType::NONE), locked_(false) {} + Node(const Node &) = delete; + Node &operator=(const Node &) = delete; + virtual ~Node(); Type type() const { @@ -135,7 +147,7 @@ public: virtual size_t leaves() const = 0; virtual const NodePtr &leafAt(size_t index) const = 0; virtual const GenericDatum &defaultValueAt(size_t index) { - throw Exception(boost::format("No default value at: %1%") % index); + throw Exception("No default value at: {}", index); } void addName(const std::string &name) { @@ -153,6 +165,14 @@ public: } virtual size_t fixedSize() const = 0; + void addCustomAttributesForField(const CustomAttributes &customAttributes) { + checkLock(); + doAddCustomAttribute(customAttributes); + } + + virtual size_t customAttributes() const = 0; + virtual const CustomAttributes &customAttributesAt(size_t index) const = 0; + virtual bool isValid() const = 0; virtual SchemaResolution resolve(const Node &reader) const = 0; @@ -185,6 +205,7 @@ protected: virtual void doAddLeaf(const NodePtr &newLeaf) = 0; virtual void doAddName(const std::string &name) = 0; virtual void doSetFixedSize(size_t size) = 0; + virtual void doAddCustomAttribute(const CustomAttributes &customAttributes) = 0; private: const Type type_; @@ -201,4 +222,12 @@ inline std::ostream &operator<<(std::ostream &os, const avro::Node &n) { } } // namespace std +template<> +struct fmt::formatter : fmt::formatter { + template + constexpr auto format(const avro::Name &n, FormatContext &ctx) const { + return fmt::formatter::format(n.fullname(), ctx); + } +}; + #endif diff --git a/lang/c++/api/NodeConcepts.hh b/lang/c++/include/avro/NodeConcepts.hh similarity index 98% rename from lang/c++/api/NodeConcepts.hh rename to lang/c++/include/avro/NodeConcepts.hh index 155c8ed9193..2f219cd94cc 100644 --- a/lang/c++/api/NodeConcepts.hh +++ b/lang/c++/include/avro/NodeConcepts.hh @@ -159,11 +159,11 @@ private: template struct NameIndexConcept { - bool lookup(const std::string &name, size_t &index) const { + bool lookup(const std::string &, size_t &) const { throw Exception("Name index does not exist"); } - bool add(const ::std::string &name, size_t) { + bool add(const ::std::string &, size_t) { throw Exception("Name index does not exist"); } }; diff --git a/lang/c++/api/NodeImpl.hh b/lang/c++/include/avro/NodeImpl.hh similarity index 80% rename from lang/c++/api/NodeImpl.hh rename to lang/c++/include/avro/NodeImpl.hh index c74d39e6b8b..2038c9d5ce7 100644 --- a/lang/c++/api/NodeImpl.hh +++ b/lang/c++/include/avro/NodeImpl.hh @@ -30,6 +30,7 @@ #include #include +#include "CustomAttributes.hh" #include "Node.hh" #include "NodeConcepts.hh" @@ -42,6 +43,7 @@ template< class NameConcept, class LeavesConcept, class LeafNamesConcept, + class MultiAttributesConcept, class SizeConcept> class NodeImpl : public Node { @@ -51,17 +53,20 @@ protected: docAttribute_(), leafAttributes_(), leafNameAttributes_(), + customAttributes_(), sizeAttribute_() {} NodeImpl(Type type, const NameConcept &name, const LeavesConcept &leaves, const LeafNamesConcept &leafNames, + const MultiAttributesConcept &customAttributes, const SizeConcept &size) : Node(type), nameAttribute_(name), docAttribute_(), leafAttributes_(leaves), leafNameAttributes_(leafNames), + customAttributes_(customAttributes), sizeAttribute_(size) {} // Ctor with "doc" @@ -70,11 +75,13 @@ protected: const concepts::SingleAttribute &doc, const LeavesConcept &leaves, const LeafNamesConcept &leafNames, + const MultiAttributesConcept &customAttributes, const SizeConcept &size) : Node(type), nameAttribute_(name), docAttribute_(doc), leafAttributes_(leaves), leafNameAttributes_(leafNames), + customAttributes_(customAttributes), sizeAttribute_(size) {} void swap(NodeImpl &impl) { @@ -83,6 +90,7 @@ protected: std::swap(leafAttributes_, impl.leafAttributes_); std::swap(leafNameAttributes_, impl.leafNameAttributes_); std::swap(sizeAttribute_, impl.sizeAttribute_); + std::swap(customAttributes_, impl.customAttributes_); std::swap(nameIndex_, impl.nameIndex_); } @@ -121,7 +129,7 @@ protected: void doAddName(const std::string &name) override { if (!nameIndex_.add(name, leafNameAttributes_.size())) { - throw Exception(boost::format("Cannot add duplicate name: %1%") % name); + throw Exception("Cannot add duplicate name: {}", name); } leafNameAttributes_.add(name); } @@ -152,6 +160,22 @@ protected: void setLeafToSymbolic(size_t index, const NodePtr &node) override; + void doAddCustomAttribute(const CustomAttributes &customAttributes) override { + customAttributes_.add(customAttributes); + } + + size_t customAttributes() const override { + return customAttributes_.size(); + } + + const CustomAttributes &customAttributesAt(size_t index) const override { + if (index >= customAttributes_.size()) { + throw Exception("Custom attribute index {} is out of bounds for size {}", + index, customAttributes_.size()); + } + return customAttributes_.get(index); + } + SchemaResolution furtherResolution(const Node &reader) const { SchemaResolution match = RESOLVE_NO_MATCH; @@ -195,6 +219,7 @@ protected: LeavesConcept leafAttributes_; LeafNamesConcept leafNameAttributes_; + MultiAttributesConcept customAttributes_; SizeConcept sizeAttribute_; concepts::NameIndexConcept nameIndex_; }; @@ -210,19 +235,21 @@ using MultiLeaves = concepts::MultiAttribute; using NoLeafNames = concepts::NoAttribute; using LeafNames = concepts::MultiAttribute; +using MultiAttributes = concepts::MultiAttribute; +using NoAttributes = concepts::NoAttribute; -using NoSize = concepts::NoAttribute; -using HasSize = concepts::SingleAttribute; +using NoSize = concepts::NoAttribute; +using HasSize = concepts::SingleAttribute; -using NodeImplPrimitive = NodeImpl; -using NodeImplSymbolic = NodeImpl; +using NodeImplPrimitive = NodeImpl; +using NodeImplSymbolic = NodeImpl; -using NodeImplRecord = NodeImpl; -using NodeImplEnum = NodeImpl; -using NodeImplArray = NodeImpl; -using NodeImplMap = NodeImpl; -using NodeImplUnion = NodeImpl; -using NodeImplFixed = NodeImpl; +using NodeImplRecord = NodeImpl; +using NodeImplEnum = NodeImpl; +using NodeImplArray = NodeImpl; +using NodeImplMap = NodeImpl; +using NodeImplUnion = NodeImpl; +using NodeImplFixed = NodeImpl; class AVRO_DECL NodePrimitive : public NodeImplPrimitive { public: @@ -245,9 +272,9 @@ class AVRO_DECL NodeSymbolic : public NodeImplSymbolic { public: NodeSymbolic() : NodeImplSymbolic(AVRO_SYMBOLIC) {} - explicit NodeSymbolic(const HasName &name) : NodeImplSymbolic(AVRO_SYMBOLIC, name, NoLeaves(), NoLeafNames(), NoSize()) {} + explicit NodeSymbolic(const HasName &name) : NodeImplSymbolic(AVRO_SYMBOLIC, name, NoLeaves(), NoLeafNames(), NoAttributes(), NoSize()) {} - NodeSymbolic(const HasName &name, const NodePtr &n) : NodeImplSymbolic(AVRO_SYMBOLIC, name, NoLeaves(), NoLeafNames(), NoSize()), actualNode_(n) {} + NodeSymbolic(const HasName &name, const NodePtr &n) : NodeImplSymbolic(AVRO_SYMBOLIC, name, NoLeaves(), NoLeafNames(), NoAttributes(), NoSize()), actualNode_(n) {} SchemaResolution resolve(const Node &reader) const override; void printJson(std::ostream &os, size_t depth) const override; @@ -265,7 +292,7 @@ public: NodePtr getNode() const { NodePtr node = actualNode_.lock(); if (!node) { - throw Exception(boost::format("Could not follow symbol %1%") % name()); + throw Exception("Could not follow symbol {}", name()); } return node; } @@ -279,30 +306,30 @@ protected: }; class AVRO_DECL NodeRecord : public NodeImplRecord { - std::vector defaultValues; + std::vector> fieldsAliases_; + std::vector fieldsDefaultValues_; public: NodeRecord() : NodeImplRecord(AVRO_RECORD) {} + NodeRecord(const HasName &name, const MultiLeaves &fields, - const LeafNames &fieldsNames, - std::vector dv); + const LeafNames &fieldsNames, std::vector dv); NodeRecord(const HasName &name, const HasDoc &doc, const MultiLeaves &fields, - const LeafNames &fieldsNames, - std::vector dv) : NodeImplRecord(AVRO_RECORD, name, doc, fields, fieldsNames, NoSize()), - defaultValues(std::move(dv)) { - for (size_t i = 0; i < leafNameAttributes_.size(); ++i) { - if (!nameIndex_.add(leafNameAttributes_.get(i), i)) { - throw Exception(boost::format( - "Cannot add duplicate field: %1%") - % leafNameAttributes_.get(i)); - } - } - } + const LeafNames &fieldsNames, std::vector dv); + + NodeRecord(const HasName &name, const MultiLeaves &fields, + const LeafNames &fieldsNames, std::vector> fieldsAliases, + std::vector dv, const MultiAttributes &customAttributes); + + NodeRecord(const HasName &name, const HasDoc &doc, const MultiLeaves &fields, + const LeafNames &fieldsNames, std::vector> fieldsAliases, + std::vector dv, const MultiAttributes &customAttributes); void swap(NodeRecord &r) { NodeImplRecord::swap(r); - defaultValues.swap(r.defaultValues); + fieldsAliases_.swap(r.fieldsAliases_); + fieldsDefaultValues_.swap(r.fieldsDefaultValues_); } SchemaResolution resolve(const Node &reader) const override; @@ -310,11 +337,11 @@ public: void printJson(std::ostream &os, size_t depth) const override; bool isValid() const override { - return ((nameAttribute_.size() == 1) && (leafAttributes_.size() == leafNameAttributes_.size())); + return ((nameAttribute_.size() == 1) && (leafAttributes_.size() == leafNameAttributes_.size()) && (customAttributes_.size() == 0 || customAttributes_.size() == leafAttributes_.size())); } const GenericDatum &defaultValueAt(size_t index) override { - return defaultValues[index]; + return fieldsDefaultValues_[index]; } void printDefaultToJson(const GenericDatum &g, std::ostream &os, size_t depth) const override; @@ -324,10 +351,10 @@ class AVRO_DECL NodeEnum : public NodeImplEnum { public: NodeEnum() : NodeImplEnum(AVRO_ENUM) {} - NodeEnum(const HasName &name, const LeafNames &symbols) : NodeImplEnum(AVRO_ENUM, name, NoLeaves(), symbols, NoSize()) { + NodeEnum(const HasName &name, const LeafNames &symbols) : NodeImplEnum(AVRO_ENUM, name, NoLeaves(), symbols, {}, NoSize()) { for (size_t i = 0; i < leafNameAttributes_.size(); ++i) { if (!nameIndex_.add(leafNameAttributes_.get(i), i)) { - throw Exception(boost::format("Cannot add duplicate enum: %1%") % leafNameAttributes_.get(i)); + throw Exception("Cannot add duplicate enum: {}", leafNameAttributes_.get(i)); } } } @@ -348,7 +375,7 @@ class AVRO_DECL NodeArray : public NodeImplArray { public: NodeArray() : NodeImplArray(AVRO_ARRAY) {} - explicit NodeArray(const SingleLeaf &items) : NodeImplArray(AVRO_ARRAY, NoName(), items, NoLeafNames(), NoSize()) {} + explicit NodeArray(const SingleLeaf &items) : NodeImplArray(AVRO_ARRAY, NoName(), items, NoLeafNames(), {}, NoSize()) {} SchemaResolution resolve(const Node &reader) const override; @@ -365,7 +392,7 @@ class AVRO_DECL NodeMap : public NodeImplMap { public: NodeMap(); - explicit NodeMap(const SingleLeaf &values) : NodeImplMap(AVRO_MAP, NoName(), MultiLeaves(values), NoLeafNames(), NoSize()) { + explicit NodeMap(const SingleLeaf &values) : NodeImplMap(AVRO_MAP, NoName(), MultiLeaves(values), NoLeafNames(), {}, NoSize()) { // need to add the key for the map too NodePtr key(new NodePrimitive(AVRO_STRING)); doAddLeaf(key); @@ -389,7 +416,7 @@ class AVRO_DECL NodeUnion : public NodeImplUnion { public: NodeUnion() : NodeImplUnion(AVRO_UNION) {} - explicit NodeUnion(const MultiLeaves &types) : NodeImplUnion(AVRO_UNION, NoName(), types, NoLeafNames(), NoSize()) {} + explicit NodeUnion(const MultiLeaves &types) : NodeImplUnion(AVRO_UNION, NoName(), types, NoLeafNames(), NoAttributes(), NoSize()) {} SchemaResolution resolve(const Node &reader) const override; @@ -458,7 +485,7 @@ class AVRO_DECL NodeFixed : public NodeImplFixed { public: NodeFixed() : NodeImplFixed(AVRO_FIXED) {} - NodeFixed(const HasName &name, const HasSize &size) : NodeImplFixed(AVRO_FIXED, name, NoLeaves(), NoLeafNames(), size) {} + NodeFixed(const HasName &name, const HasSize &size) : NodeImplFixed(AVRO_FIXED, name, NoLeaves(), NoLeafNames(), {}, size) {} SchemaResolution resolve(const Node &reader) const override; @@ -472,9 +499,9 @@ public: void printDefaultToJson(const GenericDatum &g, std::ostream &os, size_t depth) const override; }; -template +template inline void -NodeImpl::setLeafToSymbolic(size_t index, const NodePtr &node) { +NodeImpl::setLeafToSymbolic(size_t index, const NodePtr &node) { if (!B::hasAttribute) { throw Exception("Cannot change leaf node for nonexistent leaf"); } @@ -490,21 +517,21 @@ NodeImpl::setLeafToSymbolic(size_t index, const NodePtr &node) { replaceNode = symbol; } -template +template inline void -NodeImpl::printBasicInfo(std::ostream &os) const { +NodeImpl::printBasicInfo(std::ostream &os) const { os << type(); if (hasName()) { os << ' ' << nameAttribute_.get(); } - if (D::hasAttribute) { + if (E::hasAttribute) { os << " " << sizeAttribute_.get(); } os << '\n'; - int count = leaves(); + size_t count = leaves(); count = count ? count : names(); - for (int i = 0; i < count; ++i) { + for (size_t i = 0; i < count; ++i) { if (C::hasAttribute) { os << "name " << nameAt(i) << '\n'; } diff --git a/lang/c++/api/Parser.hh b/lang/c++/include/avro/Parser.hh similarity index 97% rename from lang/c++/api/Parser.hh rename to lang/c++/include/avro/Parser.hh index f6bc74874cc..3a6c3f137e0 100644 --- a/lang/c++/api/Parser.hh +++ b/lang/c++/include/avro/Parser.hh @@ -32,7 +32,7 @@ namespace avro { /// template -class Parser : private boost::noncopyable { +class Parser { public: // Constructor only works with Writer @@ -41,6 +41,9 @@ public: /// Constructor only works with ValidatingWriter Parser(const ValidSchema &schema, const InputBuffer &in) : reader_(schema, in) {} + Parser(const Parser &) = delete; + Parser &operator=(const Parser &) = delete; + void readNull() { Null null; reader_.readValue(null); diff --git a/lang/c++/api/Reader.hh b/lang/c++/include/avro/Reader.hh similarity index 94% rename from lang/c++/api/Reader.hh rename to lang/c++/include/avro/Reader.hh index ca6a719e31c..604527c0129 100644 --- a/lang/c++/api/Reader.hh +++ b/lang/c++/include/avro/Reader.hh @@ -20,7 +20,6 @@ #define avro_Reader_hh__ #include -#include #include #include @@ -38,7 +37,7 @@ namespace avro { /// template -class ReaderImpl : private boost::noncopyable { +class ReaderImpl { public: explicit ReaderImpl(const InputBuffer &buffer) : reader_(buffer) {} @@ -46,6 +45,9 @@ public: ReaderImpl(const ValidSchema &schema, const InputBuffer &buffer) : validator_(schema), reader_(buffer) {} + ReaderImpl(const ReaderImpl &) = delete; + ReaderImpl &operator=(const ReaderImpl &) = delete; + void readValue(Null &) { validator_.checkTypeExpected(AVRO_NULL); } @@ -84,7 +86,7 @@ public: union { double d; uint64_t i; - } v; + } v = {0}; reader_.read(v.i); val = v.d; } @@ -176,15 +178,15 @@ private: return encoded; } - int64_t readSize() { + size_t readSize() { uint64_t encoded = readVarInt(); - int64_t size = decodeZigzag64(encoded); + auto size = static_cast(decodeZigzag64(encoded)); return size; } - int64_t readCount() { + size_t readCount() { validator_.checkTypeExpected(AVRO_LONG); - int64_t count = readSize(); + size_t count = readSize(); validator_.setCount(count); return count; } diff --git a/lang/c++/api/Resolver.hh b/lang/c++/include/avro/Resolver.hh similarity index 89% rename from lang/c++/api/Resolver.hh rename to lang/c++/include/avro/Resolver.hh index 06c33e76c68..e1fc00d296a 100644 --- a/lang/c++/api/Resolver.hh +++ b/lang/c++/include/avro/Resolver.hh @@ -19,7 +19,6 @@ #ifndef avro_Resolver_hh__ #define avro_Resolver_hh__ -#include #include #include @@ -34,8 +33,12 @@ namespace avro { class ValidSchema; class Layout; -class AVRO_DECL Resolver : private boost::noncopyable { +class AVRO_DECL Resolver { public: + Resolver() = default; + Resolver(const Resolver &) = delete; + Resolver &operator=(const Resolver &) = delete; + virtual void parse(Reader &reader, uint8_t *address) const = 0; virtual ~Resolver() = default; }; diff --git a/lang/c++/api/ResolverSchema.hh b/lang/c++/include/avro/ResolverSchema.hh similarity index 97% rename from lang/c++/api/ResolverSchema.hh rename to lang/c++/include/avro/ResolverSchema.hh index d641d08f8c9..25c113fbe6f 100644 --- a/lang/c++/api/ResolverSchema.hh +++ b/lang/c++/include/avro/ResolverSchema.hh @@ -19,7 +19,6 @@ #ifndef avro_ResolverSchema_hh__ #define avro_ResolverSchema_hh__ -#include #include #include diff --git a/lang/c++/api/ResolvingReader.hh b/lang/c++/include/avro/ResolvingReader.hh similarity index 90% rename from lang/c++/api/ResolvingReader.hh rename to lang/c++/include/avro/ResolvingReader.hh index c7aed39743c..5ced210dbad 100644 --- a/lang/c++/api/ResolvingReader.hh +++ b/lang/c++/include/avro/ResolvingReader.hh @@ -19,7 +19,6 @@ #ifndef avro_ResolvingReader_hh__ #define avro_ResolvingReader_hh__ -#include #include #include "Config.hh" @@ -28,11 +27,13 @@ namespace avro { -class AVRO_DECL ResolvingReader : private boost::noncopyable { +class AVRO_DECL ResolvingReader { public: ResolvingReader(const ResolverSchema &schema, const InputBuffer &in) : reader_(in), schema_(schema) {} + ResolvingReader(const ResolvingReader &) = delete; + ResolvingReader &operator=(const ResolvingReader &) = delete; template void parse(T &object) { diff --git a/lang/c++/api/Schema.hh b/lang/c++/include/avro/Schema.hh similarity index 94% rename from lang/c++/api/Schema.hh rename to lang/c++/include/avro/Schema.hh index abd646f9fc7..6eec0e8b6e4 100644 --- a/lang/c++/api/Schema.hh +++ b/lang/c++/include/avro/Schema.hh @@ -20,6 +20,7 @@ #define avro_Schema_hh__ #include "Config.hh" +#include "CustomAttributes.hh" #include "NodeImpl.hh" #include @@ -100,6 +101,9 @@ class AVRO_DECL RecordSchema : public Schema { public: explicit RecordSchema(const std::string &name); void addField(const std::string &name, const Schema &fieldSchema); + // Add a field with custom attributes + void addField(const std::string &name, const Schema &fieldSchema, + const CustomAttributes &customAttributes); std::string getDoc() const; void setDoc(const std::string &); diff --git a/lang/c++/api/SchemaResolution.hh b/lang/c++/include/avro/SchemaResolution.hh similarity index 100% rename from lang/c++/api/SchemaResolution.hh rename to lang/c++/include/avro/SchemaResolution.hh diff --git a/lang/c++/api/Serializer.hh b/lang/c++/include/avro/Serializer.hh similarity index 95% rename from lang/c++/api/Serializer.hh rename to lang/c++/include/avro/Serializer.hh index 1a2c8e029f9..484a477ca93 100644 --- a/lang/c++/api/Serializer.hh +++ b/lang/c++/include/avro/Serializer.hh @@ -20,7 +20,6 @@ #define avro_Serializer_hh__ #include -#include #include "Config.hh" #include "Writer.hh" @@ -31,7 +30,7 @@ namespace avro { /// explicit write* names instead of writeValue template -class Serializer : private boost::noncopyable { +class Serializer { public: /// Constructor only works with Writer @@ -40,6 +39,9 @@ public: /// Constructor only works with ValidatingWriter explicit Serializer(const ValidSchema &schema) : writer_(schema) {} + Serializer(const Serializer &) = delete; + Serializer &operator=(const Serializer &) = delete; + void writeNull() { writer_.writeValue(Null()); } diff --git a/lang/c++/api/Specific.hh b/lang/c++/include/avro/Specific.hh similarity index 99% rename from lang/c++/api/Specific.hh rename to lang/c++/include/avro/Specific.hh index 247d86da720..fc28b3f5e4b 100644 --- a/lang/c++/api/Specific.hh +++ b/lang/c++/include/avro/Specific.hh @@ -25,8 +25,6 @@ #include #include -#include "boost/blank.hpp" - #include "AvroTraits.hh" #include "Config.hh" #include "Decoder.hh" @@ -48,7 +46,8 @@ */ namespace avro { -typedef boost::blank null; +struct null { +}; template void encode(Encoder &e, const T &t); diff --git a/lang/c++/api/Stream.hh b/lang/c++/include/avro/Stream.hh similarity index 97% rename from lang/c++/api/Stream.hh rename to lang/c++/include/avro/Stream.hh index fe2c97ee2dd..de213404d3f 100644 --- a/lang/c++/api/Stream.hh +++ b/lang/c++/include/avro/Stream.hh @@ -22,8 +22,7 @@ #include #include #include - -#include "boost/utility.hpp" +#include #include "Config.hh" #include "Exception.hh" @@ -33,13 +32,16 @@ namespace avro { /** * A no-copy input stream. */ -class AVRO_DECL InputStream : boost::noncopyable { +class AVRO_DECL InputStream { protected: /** * An empty constructor. */ InputStream() = default; + InputStream(const InputStream &) = delete; + InputStream &operator=(const InputStream &) = delete; + public: /** * Destructor. @@ -105,13 +107,16 @@ typedef std::unique_ptr SeekableInputStreamPtr; /** * A no-copy output stream. */ -class AVRO_DECL OutputStream : boost::noncopyable { +class AVRO_DECL OutputStream { protected: /** * An empty constructor. */ OutputStream() = default; + OutputStream(const OutputStream &) = delete; + OutputStream &operator=(const OutputStream &) = delete; + public: /** * Destructor. diff --git a/lang/c++/api/Types.hh b/lang/c++/include/avro/Types.hh similarity index 92% rename from lang/c++/api/Types.hh rename to lang/c++/include/avro/Types.hh index e3296ae0d00..d02b8006e57 100644 --- a/lang/c++/api/Types.hh +++ b/lang/c++/include/avro/Types.hh @@ -19,6 +19,7 @@ #ifndef avro_Types_hh__ #define avro_Types_hh__ +#include #include #include "Config.hh" @@ -109,4 +110,12 @@ std::ostream &operator<<(std::ostream &os, const Null &null); } // namespace avro +template<> +struct fmt::formatter : fmt::formatter { + template + constexpr auto format(avro::Type t, FormatContext &ctx) const { + return fmt::formatter::format(avro::toString(t), ctx); + } +}; + #endif diff --git a/lang/c++/api/ValidSchema.hh b/lang/c++/include/avro/ValidSchema.hh similarity index 100% rename from lang/c++/api/ValidSchema.hh rename to lang/c++/include/avro/ValidSchema.hh diff --git a/lang/c++/api/Validator.hh b/lang/c++/include/avro/Validator.hh similarity index 76% rename from lang/c++/api/Validator.hh rename to lang/c++/include/avro/Validator.hh index ab5d068df0b..76b74764d80 100644 --- a/lang/c++/api/Validator.hh +++ b/lang/c++/include/avro/Validator.hh @@ -19,7 +19,6 @@ #ifndef avro_Validating_hh__ #define avro_Validating_hh__ -#include #include #include #include @@ -30,12 +29,15 @@ namespace avro { -class AVRO_DECL NullValidator : private boost::noncopyable { +class AVRO_DECL NullValidator { public: - explicit NullValidator(const ValidSchema &schema) {} + explicit NullValidator(const ValidSchema &) {} NullValidator() = default; - void setCount(int64_t) {} + NullValidator(const NullValidator &) = delete; + NullValidator &operator=(const NullValidator &) = delete; + + void setCount(size_t) {} static bool typeIsExpected(Type) { return true; @@ -45,20 +47,20 @@ public: return AVRO_UNKNOWN; } - static int nextSizeExpected() { + static size_t nextSizeExpected() { return 0; } - static bool getCurrentRecordName(std::string &name) { + static bool getCurrentRecordName(std::string &) { return true; } - static bool getNextFieldName(std::string &name) { + static bool getNextFieldName(std::string &) { return true; } void checkTypeExpected(Type) {} - void checkFixedSizeExpected(int) {} + void checkFixedSizeExpected(size_t) {} }; /// This class is used by both the ValidatingSerializer and ValidationParser @@ -67,11 +69,14 @@ public: /// through all leaf nodes but a union only skips to one), and reports which /// type is next. -class AVRO_DECL Validator : private boost::noncopyable { +class AVRO_DECL Validator { public: explicit Validator(ValidSchema schema); - void setCount(int64_t val); + Validator(const Validator &) = delete; + Validator &operator=(const Validator &) = delete; + + void setCount(size_t val); bool typeIsExpected(Type type) const { return (expectedTypesFlag_ & typeToFlag(type)) != 0; @@ -81,25 +86,21 @@ public: return nextType_; } - int nextSizeExpected() const; + size_t nextSizeExpected() const; bool getCurrentRecordName(std::string &name) const; bool getNextFieldName(std::string &name) const; void checkTypeExpected(Type type) { if (!typeIsExpected(type)) { - throw Exception( - boost::format("Type %1% does not match schema %2%") - % type % nextType_); + throw Exception("Type {} does not match schema {}", type, nextType_); } advance(); } - void checkFixedSizeExpected(int size) { + void checkFixedSizeExpected(size_t size) { if (nextSizeExpected() != size) { - throw Exception( - boost::format("Wrong size for fixed, got %1%, expected %2%") - % size % nextSizeExpected()); + throw Exception("Wrong size for fixed, got {}, expected {}", size, nextSizeExpected()); } checkTypeExpected(AVRO_FIXED); } @@ -108,7 +109,7 @@ private: using flag_t = uint32_t; static flag_t typeToFlag(Type type) { - flag_t flag = (1L << type); + flag_t flag = 1u << static_cast(type); return flag; } @@ -133,7 +134,7 @@ private: flag_t expectedTypesFlag_; bool compoundStarted_; bool waitingForCount_; - int64_t count_; + size_t count_; struct CompoundType { explicit CompoundType(NodePtr n) : node(std::move(n)), pos(0) {} diff --git a/lang/c++/api/Writer.hh b/lang/c++/include/avro/Writer.hh similarity index 97% rename from lang/c++/api/Writer.hh rename to lang/c++/include/avro/Writer.hh index 930ea398c44..426f8ba0ed0 100644 --- a/lang/c++/api/Writer.hh +++ b/lang/c++/include/avro/Writer.hh @@ -20,7 +20,6 @@ #define avro_Writer_hh__ #include -#include #include "Config.hh" #include "Types.hh" @@ -33,13 +32,16 @@ namespace avro { /// Class for writing avro data to a stream. template -class WriterImpl : private boost::noncopyable { +class WriterImpl { public: WriterImpl() = default; explicit WriterImpl(const ValidSchema &schema) : validator_(schema) {} + WriterImpl(const WriterImpl &) = delete; + WriterImpl &operator=(const WriterImpl &) = delete; + void writeValue(const Null &) { validator_.checkTypeExpected(AVRO_NULL); } diff --git a/lang/c++/api/Zigzag.hh b/lang/c++/include/avro/Zigzag.hh similarity index 90% rename from lang/c++/api/Zigzag.hh rename to lang/c++/include/avro/Zigzag.hh index fefdc3f32e7..5d20e028b2d 100644 --- a/lang/c++/api/Zigzag.hh +++ b/lang/c++/include/avro/Zigzag.hh @@ -30,16 +30,14 @@ namespace avro { AVRO_DECL constexpr uint64_t encodeZigzag64(int64_t input) noexcept { - // cppcheck-suppress shiftTooManyBitsSigned - return ((input << 1) ^ (input >> 63)); + return ((static_cast(input) << 1) ^ (input >> 63)); } AVRO_DECL constexpr int64_t decodeZigzag64(uint64_t input) noexcept { return static_cast(((input >> 1) ^ -(static_cast(input) & 1))); } AVRO_DECL constexpr uint32_t encodeZigzag32(int32_t input) noexcept { - // cppcheck-suppress shiftTooManyBitsSigned - return ((input << 1) ^ (input >> 31)); + return (static_cast(input) << 1) ^ (input >> 31); } AVRO_DECL constexpr int32_t decodeZigzag32(uint32_t input) noexcept { return static_cast(((input >> 1) ^ -(static_cast(input) & 1))); diff --git a/lang/c++/api/buffer/Buffer.hh b/lang/c++/include/avro/buffer/Buffer.hh similarity index 98% rename from lang/c++/api/buffer/Buffer.hh rename to lang/c++/include/avro/buffer/Buffer.hh index bc3baf12330..16a22ef626e 100644 --- a/lang/c++/api/buffer/Buffer.hh +++ b/lang/c++/include/avro/buffer/Buffer.hh @@ -145,7 +145,7 @@ public: **/ size_type wroteTo(size_type size) { - int wrote = 0; + size_type wrote = 0; if (size) { if (size > freeSpace()) { throw std::length_error("Impossible to write more data than free space"); @@ -276,7 +276,7 @@ public: * Returns the number of chunks that contain free space. **/ - int numChunks() const { + size_t numChunks() const { return pimpl_->numFreeChunks(); } @@ -284,7 +284,7 @@ public: * Returns the number of chunks that contain data **/ - int numDataChunks() const { + size_t numDataChunks() const { return pimpl_->numDataChunks(); } @@ -384,7 +384,7 @@ public: * Returns the number of chunks containing data. **/ - int numChunks() const { + size_t numChunks() const { return pimpl_->numDataChunks(); } @@ -476,10 +476,10 @@ inline InputBuffer OutputBuffer::extractData(size_type bytes) { template inline void toIovec(BufferType &buf, std::vector &iov) { - const int chunks = buf.numChunks(); + const size_t chunks = buf.numChunks(); iov.resize(chunks); typename BufferType::const_iterator iter = buf.begin(); - for (int i = 0; i < chunks; ++i) { + for (size_t i = 0; i < chunks; ++i) { iov[i].iov_base = const_cast(iter->data()); iov[i].iov_len = iter->size(); ++iter; diff --git a/lang/c++/api/buffer/BufferPrint.hh b/lang/c++/include/avro/buffer/BufferPrint.hh similarity index 99% rename from lang/c++/api/buffer/BufferPrint.hh rename to lang/c++/include/avro/buffer/BufferPrint.hh index c8eb15b719a..8d4001529c9 100644 --- a/lang/c++/api/buffer/BufferPrint.hh +++ b/lang/c++/include/avro/buffer/BufferPrint.hh @@ -47,7 +47,7 @@ hexPrint(std::ostream &os, BufferReader &reader) { std::ios_base::fmtflags savedFlags = os.flags(); char sixteenBytes[16]; - int offset = 0; + size_t offset = 0; os << std::setfill('0'); os << std::hex; diff --git a/lang/c++/api/buffer/BufferReader.hh b/lang/c++/include/avro/buffer/BufferReader.hh similarity index 97% rename from lang/c++/api/buffer/BufferReader.hh rename to lang/c++/include/avro/buffer/BufferReader.hh index 7f49518e64b..a08dc251207 100644 --- a/lang/c++/api/buffer/BufferReader.hh +++ b/lang/c++/include/avro/buffer/BufferReader.hh @@ -20,6 +20,7 @@ #define avro_BufferReader_hh__ #include "Buffer.hh" +#include #include #ifdef min @@ -40,7 +41,7 @@ namespace avro { * chunk boundaries. May read from an InputBuffer or OutputBuffer. * **/ -class AVRO_DECL BufferReader : private boost::noncopyable { +class AVRO_DECL BufferReader { public: typedef detail::data_type data_type; @@ -83,6 +84,9 @@ public: bytesRemaining_(bytes_), chunkPos_(0) {} + BufferReader(const BufferReader &) = delete; + BufferReader &operator=(const BufferReader &) = delete; + /** * How many bytes are still not read from this buffer. **/ @@ -230,7 +234,7 @@ private: } if (sizeof(T) <= chunkRemaining()) { - val = *(reinterpret_cast(addr())); + memcpy(&val, addr(), sizeof(T)); incrementChunk(sizeof(T)); } else { read(reinterpret_cast(&val), sizeof(T)); diff --git a/lang/c++/api/buffer/BufferStream.hh b/lang/c++/include/avro/buffer/BufferStream.hh similarity index 96% rename from lang/c++/api/buffer/BufferStream.hh rename to lang/c++/include/avro/buffer/BufferStream.hh index a8510adaa1c..3aeda37f0f7 100644 --- a/lang/c++/api/buffer/BufferStream.hh +++ b/lang/c++/include/avro/buffer/BufferStream.hh @@ -35,7 +35,7 @@ namespace avro { * **/ -class AVRO_DECL ostream : public std::ostream { +class ostream : public std::ostream { public: /// Default constructor, creates a new OutputBuffer. @@ -65,7 +65,7 @@ protected: * **/ -class AVRO_DECL istream : public std::istream { +class istream : public std::istream { public: /// Constructor, requires an InputBuffer to read from. diff --git a/lang/c++/api/buffer/BufferStreambuf.hh b/lang/c++/include/avro/buffer/BufferStreambuf.hh similarity index 95% rename from lang/c++/api/buffer/BufferStreambuf.hh rename to lang/c++/include/avro/buffer/BufferStreambuf.hh index 2b7aea4d779..38cc91fce62 100644 --- a/lang/c++/api/buffer/BufferStreambuf.hh +++ b/lang/c++/include/avro/buffer/BufferStreambuf.hh @@ -41,7 +41,7 @@ namespace avro { * but we have no need since all writes are immediately stored in the buffer. **/ -class AVRO_DECL ostreambuf : public std::streambuf { +class ostreambuf : public std::streambuf { public: /// Default constructor creates a new OutputBuffer. @@ -86,7 +86,7 @@ private: * **/ -class AVRO_DECL istreambuf : public std::streambuf { +class istreambuf : public std::streambuf { public: /// Default constructor requires an InputBuffer to read from. @@ -135,7 +135,11 @@ protected: memcpy(c, gptr(), toCopy); c += toCopy; bytesCopied += toCopy; - gbump(toCopy); + while (toCopy > static_cast(std::numeric_limits::max())) { + gbump(std::numeric_limits::max()); + toCopy -= static_cast(std::numeric_limits::max()); + } + gbump(static_cast(toCopy)); } if (bytesCopied < len) { diff --git a/lang/c++/api/buffer/detail/BufferDetail.hh b/lang/c++/include/avro/buffer/detail/BufferDetail.hh similarity index 95% rename from lang/c++/api/buffer/detail/BufferDetail.hh rename to lang/c++/include/avro/buffer/detail/BufferDetail.hh index b487cdb3935..30745008fdc 100644 --- a/lang/c++/api/buffer/detail/BufferDetail.hh +++ b/lang/c++/include/avro/buffer/detail/BufferDetail.hh @@ -19,18 +19,13 @@ #ifndef avro_BufferDetail_hh__ #define avro_BufferDetail_hh__ -#include -#include -#include -#include -#include -#include -#ifdef HAVE_BOOST_ASIO -#include -#endif #include +#include #include #include +#include +#include +#include /** * \file BufferDetail.hh @@ -45,17 +40,13 @@ namespace detail { typedef char data_type; typedef size_t size_type; -#ifdef HAVE_BOOST_ASIO -typedef boost::asio::const_buffer ConstAsioBuffer; -typedef boost::asio::mutable_buffer MutableAsioBuffer; -#endif /// The size in bytes for blocks backing buffer chunks. const size_type kMinBlockSize = 4096; const size_type kMaxBlockSize = 16384; const size_type kDefaultBlockSize = kMinBlockSize; -typedef boost::function free_func; +typedef std::function free_func; /** * Simple class to hold a functor that executes on delete @@ -108,7 +99,7 @@ public: private: // reference counted object will call a functor when it's destroyed - boost::shared_ptr callOnDestroy_; + std::shared_ptr callOnDestroy_; public: /// Remove readable bytes from the front of the chunk by advancing the @@ -161,7 +152,7 @@ private: friend bool operator!=(const Chunk &lhs, const Chunk &rhs); // more than one buffer can share an underlying block, so use SharedPtr - boost::shared_array underlyingBlock_; + std::shared_ptr underlyingBlock_; data_type *readPos_; ///< The first readable byte in the block data_type *writePos_; ///< The end of written data and start of free space @@ -191,7 +182,7 @@ inline bool operator!=(const Chunk &lhs, const Chunk &rhs) { * */ -class BufferImpl : boost::noncopyable { +class BufferImpl { /// Add a new chunk to the list of chunks for this buffer, growing the /// buffer by the default block size. @@ -274,8 +265,8 @@ class BufferImpl : boost::noncopyable { public: typedef std::deque ChunkList; - typedef boost::shared_ptr SharedPtr; - typedef boost::shared_ptr ConstSharedPtr; + typedef std::shared_ptr SharedPtr; + typedef std::shared_ptr ConstSharedPtr; /// Default constructor, creates a buffer without any chunks BufferImpl() : freeSpace_(0), @@ -330,7 +321,7 @@ public: if (freeSpace_ && (sizeof(T) <= writeChunks_.front().freeSize())) { // fast path, there's enough room in the writeable chunk to just // straight out copy it - *(reinterpret_cast(writeChunks_.front().tellWritePos())) = val; + memcpy(writeChunks_.front().tellWritePos(), &val, sizeof(T)); postWrite(sizeof(T)); } else { // need to fixup chunks first, so use the regular memcpy @@ -343,7 +334,7 @@ public: /// and will compile-time assert. template void writeTo(T /*val*/, const std::false_type &) { - BOOST_STATIC_ASSERT(sizeof(T) == 0); + static_assert(sizeof(T) == 0); } /// Write a block of data to the buffer, adding new chunks if necessary. @@ -481,13 +472,13 @@ public: } /// The number of chunks containing data. Used for debugging. - int numDataChunks() const { + size_t numDataChunks() const { return readChunks_.size(); } /// The number of chunks containing free space (note that an entire chunk /// may not be free). Used for debugging. - int numFreeChunks() const { + size_t numFreeChunks() const { return writeChunks_.size(); } diff --git a/lang/c++/api/buffer/detail/BufferDetailIterator.hh b/lang/c++/include/avro/buffer/detail/BufferDetailIterator.hh similarity index 89% rename from lang/c++/api/buffer/detail/BufferDetailIterator.hh rename to lang/c++/include/avro/buffer/detail/BufferDetailIterator.hh index 44e35dd0138..d5efbe176ce 100644 --- a/lang/c++/api/buffer/detail/BufferDetailIterator.hh +++ b/lang/c++/include/avro/buffer/detail/BufferDetailIterator.hh @@ -57,15 +57,6 @@ struct InputIteratorHelper { return iter_->dataSize(); } - /// Conversion operator. It doesn't check for null, because the only - /// the only time the chunk should be null is when it's the iterator - /// end(), which should never be dereferenced anyway. -#ifdef HAVE_BOOST_ASIO - operator ConstAsioBuffer() const { - return ConstAsioBuffer(data(), size()); - } -#endif - BufferImpl::ChunkList::const_iterator iter_; ///< the current iterator }; @@ -95,15 +86,6 @@ struct OutputIteratorHelper { return iter_->freeSize(); } - /// Conversion operator. It doesn't check for null, because the only - /// the only time the chunk should be null is when it's the iterator - /// end(), which should never be dereferenced anyway. -#ifdef HAVE_BOOST_ASIO - operator MutableAsioBuffer() const { - return MutableAsioBuffer(data(), size()); - } -#endif - BufferImpl::ChunkList::const_iterator iter_; ///< the current iterator }; diff --git a/lang/c++/jsonschemas/big_union b/lang/c++/jsonschemas/big_union new file mode 100644 index 00000000000..34cced4493b --- /dev/null +++ b/lang/c++/jsonschemas/big_union @@ -0,0 +1,101 @@ +{ + "type": "record", + "doc": "Top level Doc.", + "name": "RootRecord", + "fields": [ + { + "name": "big_union", + "doc": "A large union containing the primitive types, a array, a map and records.", + "type": [ + "null", + "boolean", + "int", + "long", + "float", + "double", + { + "type": "fixed", + "size": 16, + "name": "MD5" + }, + "string", + { + "type": "record", + "name": "Vec2", + "fields": [ + { + "name": "x", + "type": "long" + }, + { + "name": "y", + "type": "long" + } + ] + }, + { + "type": "record", + "name": "Vec3", + "fields": [ + { + "name": "x", + "type": "long" + }, + { + "name": "y", + "type": "long" + }, + { + "name": "z", + "type": "long" + } + ] + }, + { + "type": "enum", + "name": "Suit", + "symbols": [ + "SPADES", + "HEARTS", + "DIAMONDS", + "CLUBS" + ] + }, + { + "type": "array", + "items": "string", + "default": [] + }, + { + "type": "map", + "values": "long", + "default": {} + }, + { + "type": "record", + "name": "int_", + "doc": "try to force a collision with int", + "fields": [] + }, + { + "type": "record", + "name": "int__", + "doc": "try to force a collision with int", + "fields": [] + }, + { + "type": "record", + "name": "Int", + "doc": "name similar to primitive name", + "fields": [] + }, + { + "type": "record", + "name": "_Int", + "doc": "name with underscore as prefix", + "fields": [] + } + ] + } + ] +} diff --git a/lang/c++/jsonschemas/bigrecord b/lang/c++/jsonschemas/bigrecord index af8a5ad39b8..e7fd7fd7b15 100644 --- a/lang/c++/jsonschemas/bigrecord +++ b/lang/c++/jsonschemas/bigrecord @@ -1,6 +1,6 @@ { "type": "record", - "doc": "Top level Doc.", + "doc": "Top level Doc.\nWith multiple lines", "name": "RootRecord", "fields": [ { @@ -10,6 +10,7 @@ }, { "name": "nestedrecord", + "doc": "Doc edge cases\r\nwith trailing backslash\\\t \n\\\n\\ \n\\x", "type": { "type": "record", "name": "Nested", diff --git a/lang/c++/jsonschemas/cpp_reserved_words_union_typedef b/lang/c++/jsonschemas/cpp_reserved_words_union_typedef new file mode 100644 index 00000000000..215f2f4c0fc --- /dev/null +++ b/lang/c++/jsonschemas/cpp_reserved_words_union_typedef @@ -0,0 +1,13 @@ +{ + "type": "record", + "name": "Record", + "fields": [ + { + "name": "void", + "type": [ + "int", + "double" + ] + } + ] +} diff --git a/lang/c++/jsonschemas/union_empty_record b/lang/c++/jsonschemas/union_empty_record new file mode 100644 index 00000000000..5d2523165ff --- /dev/null +++ b/lang/c++/jsonschemas/union_empty_record @@ -0,0 +1,25 @@ +{ + "type": "record", + "name": "StackCalculator", + "fields": [ + { + "name": "stack", + "type": { + "type": "array", + "items": [ + "int", + { + "type": "record", + "name": "Dup", + "fields": [] + }, + { + "type": "record", + "name": "Add", + "fields": [] + } + ] + } + } + ] +} diff --git a/lang/c++/jsonschemas/union_redundant_types b/lang/c++/jsonschemas/union_redundant_types new file mode 100644 index 00000000000..b45b11b3a13 --- /dev/null +++ b/lang/c++/jsonschemas/union_redundant_types @@ -0,0 +1,22 @@ +{ + "type": "record", + "name": "RedundantUnionSchema", + "doc": "Schema to test the generation of redundant union types in avrogencpp", + "fields" : [ + {"name": "null_string_1", "type": ["null", "string"]}, + {"name": "null_string_2", "type": ["null", "string"]}, + {"name": "string_null_1", "type": ["string", "null"]}, + {"name": "string_null_2", "type": ["string", "null"]}, + {"name": "null_string_int", "type": ["string", "null", "int"]}, + {"name": "null_Empty_1", "type": ["null", {"type": "record", "name": "Empty", "fields": []}]}, + {"name": "null_Empty_2", "type": ["null", "Empty"]}, + {"name": "null_namespace_record_1", "type": ["null", {"type": "record", "namespace": "example_namespace", "name": "Record", "fields": []}]}, + {"name": "null_namespace_record_2", "type": ["null", "example_namespace.Record"]}, + {"name": "null_fixed_8", "type": ["null", {"type": "fixed", "size": 8, "name": "fixed_8"}]}, + {"name": "null_fixed_16", "type": ["null", {"type": "fixed", "size": 16, "name": "fixed_16"}]}, + {"name": "fixed_8_fixed_16", "type": ["fixed_8", "fixed_16"]}, + {"name": "null_int_map_1", "type": ["null", {"type": "map", "values": "int"}]}, + {"name": "null_int_map_2", "type": ["null", {"type": "map", "values": "int"}]}, + {"name": "null_long_map", "type": ["null", {"type": "map", "values": "long"}]} + ] +} diff --git a/lang/c++/m4/README b/lang/c++/m4/README deleted file mode 100644 index 6d90a5a133e..00000000000 --- a/lang/c++/m4/README +++ /dev/null @@ -1,3 +0,0 @@ -The macros in this directory came from https://www.nongnu.org/autoconf-archive/index.html - -Please refer to the files for their licensing info. diff --git a/lang/c++/m4/m4_ax_boost_asio.m4 b/lang/c++/m4/m4_ax_boost_asio.m4 deleted file mode 100644 index d0d070b017b..00000000000 --- a/lang/c++/m4/m4_ax_boost_asio.m4 +++ /dev/null @@ -1,108 +0,0 @@ -# =========================================================================== -# https://www.gnu.org/software/autoconf-archive/ax_boost_asio.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_BOOST_ASIO -# -# DESCRIPTION -# -# Test for Asio library from the Boost C++ libraries. The macro requires a -# preceding call to AX_BOOST_BASE. Further documentation is available at -# . -# -# This macro calls: -# -# AC_SUBST(BOOST_ASIO_LIB) -# -# And sets: -# -# HAVE_BOOST_ASIO -# -# LICENSE -# -# Copyright (c) 2008 Thomas Porschberg -# Copyright (c) 2008 Pete Greenwell -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -#serial 7 - -AC_DEFUN([AX_BOOST_ASIO], -[ - AC_ARG_WITH([boost-asio], - AS_HELP_STRING([--with-boost-asio@<:@=special-lib@:>@], - [use the ASIO library from boost - it is possible to specify a certain library for the linker - e.g. --with-boost-asio=boost_system-gcc41-mt-1_34 ]), - [ - if test "$withval" = "no"; then - want_boost="no" - elif test "$withval" = "yes"; then - want_boost="yes" - ax_boost_user_asio_lib="" - else - want_boost="yes" - ax_boost_user_asio_lib="$withval" - fi - ], - [want_boost="yes"] - ) - - if test "x$want_boost" = "xyes"; then - AC_REQUIRE([AC_PROG_CC]) - CPPFLAGS_SAVED="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" - export CPPFLAGS - - LDFLAGS_SAVED="$LDFLAGS" - LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" - export LDFLAGS - - AC_CACHE_CHECK(whether the Boost::ASIO library is available, - ax_cv_boost_asio, - [AC_LANG_PUSH([C++]) - AC_COMPILE_IFELSE(AC_LANG_PROGRAM([[ @%:@include - ]], - [[ - - boost::asio::io_service io; - boost::system::error_code timer_result; - boost::asio::deadline_timer t(io); - t.cancel(); - io.run_one(); - return 0; - ]]), - ax_cv_boost_asio=yes, ax_cv_boost_asio=no) - AC_LANG_POP([C++]) - ]) - if test "x$ax_cv_boost_asio" = "xyes"; then - AC_DEFINE(HAVE_BOOST_ASIO,,[define if the Boost::ASIO library is available]) - BN=boost_system - if test "x$ax_boost_user_asio_lib" = "x"; then - for ax_lib in $BN $BN-$CC $BN-$CC-mt $BN-$CC-mt-s $BN-$CC-s \ - lib$BN lib$BN-$CC lib$BN-$CC-mt lib$BN-$CC-mt-s lib$BN-$CC-s \ - $BN-mgw $BN-mgw $BN-mgw-mt $BN-mgw-mt-s $BN-mgw-s ; do - AC_CHECK_LIB($ax_lib, main, [BOOST_ASIO_LIB="-l$ax_lib" AC_SUBST(BOOST_ASIO_LIB) link_thread="yes" break], - [link_thread="no"]) - done - else - for ax_lib in $ax_boost_user_asio_lib $BN-$ax_boost_user_asio_lib; do - AC_CHECK_LIB($ax_lib, main, - [BOOST_ASIO_LIB="-l$ax_lib" AC_SUBST(BOOST_ASIO_LIB) link_asio="yes" break], - [link_asio="no"]) - done - - fi - if test "x$link_asio" = "xno"; then - AC_MSG_ERROR(Could not link against $ax_lib !) - fi - fi - - CPPFLAGS="$CPPFLAGS_SAVED" - LDFLAGS="$LDFLAGS_SAVED" - fi -]) diff --git a/lang/c++/m4/m4_ax_boost_base.m4 b/lang/c++/m4/m4_ax_boost_base.m4 deleted file mode 100644 index 34f63c751a8..00000000000 --- a/lang/c++/m4/m4_ax_boost_base.m4 +++ /dev/null @@ -1,219 +0,0 @@ -# =========================================================================== -# https://www.nongnu.org/autoconf-archive/ax_boost_base.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_BOOST_BASE([MINIMUM-VERSION]) -# -# DESCRIPTION -# -# Test for the Boost C++ libraries of a particular version (or newer) -# -# If no path to the installed boost library is given the macro searchs -# under /usr, /usr/local, /opt and /opt/local and evaluates the -# $BOOST_ROOT environment variable. Further documentation is available at -# . -# -# This macro calls: -# -# AC_SUBST(BOOST_CPPFLAGS) / AC_SUBST(BOOST_LDFLAGS) -# -# And sets: -# -# HAVE_BOOST -# -# LICENSE -# -# Copyright (c) 2008 Thomas Porschberg -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. - -AC_DEFUN([AX_BOOST_BASE], -[ -AC_ARG_WITH([boost], - AS_HELP_STRING([--with-boost@<:@=DIR@:>@], [use boost (default is yes) - it is possible to specify the root directory for boost (optional)]), - [ - if test "$withval" = "no"; then - want_boost="no" - elif test "$withval" = "yes"; then - want_boost="yes" - ac_boost_path="" - else - want_boost="yes" - ac_boost_path="$withval" - fi - ], - [want_boost="yes"]) - - -AC_ARG_WITH([boost-libdir], - AS_HELP_STRING([--with-boost-libdir=LIB_DIR], - [Force given directory for boost libraries. Note that this will overwrite library path detection, so use this parameter only if default library detection fails and you know exactly where your boost libraries are located.]), - [ - if test -d $withval - then - ac_boost_lib_path="$withval" - else - AC_MSG_ERROR(--with-boost-libdir expected directory name) - fi - ], - [ac_boost_lib_path=""] -) - -if test "x$want_boost" = "xyes"; then - boost_lib_version_req=ifelse([$1], ,1.20.0,$1) - boost_lib_version_req_shorten=`expr $boost_lib_version_req : '\([[0-9]]*\.[[0-9]]*\)'` - boost_lib_version_req_major=`expr $boost_lib_version_req : '\([[0-9]]*\)'` - boost_lib_version_req_minor=`expr $boost_lib_version_req : '[[0-9]]*\.\([[0-9]]*\)'` - boost_lib_version_req_sub_minor=`expr $boost_lib_version_req : '[[0-9]]*\.[[0-9]]*\.\([[0-9]]*\)'` - if test "x$boost_lib_version_req_sub_minor" = "x" ; then - boost_lib_version_req_sub_minor="0" - fi - WANT_BOOST_VERSION=`expr $boost_lib_version_req_major \* 100000 \+ $boost_lib_version_req_minor \* 100 \+ $boost_lib_version_req_sub_minor` - AC_MSG_CHECKING(for boostlib >= $boost_lib_version_req) - succeeded=no - - dnl first we check the system location for boost libraries - dnl this location ist chosen if boost libraries are installed with the --layout=system option - dnl or if you install boost with RPM - if test "$ac_boost_path" != ""; then - BOOST_LDFLAGS="-L$ac_boost_path/lib" - BOOST_CPPFLAGS="-I$ac_boost_path/include" - else - for ac_boost_path_tmp in /usr /usr/local /opt /opt/local ; do - if test -d "$ac_boost_path_tmp/include/boost" && test -r "$ac_boost_path_tmp/include/boost"; then - BOOST_LDFLAGS="-L$ac_boost_path_tmp/lib" - BOOST_CPPFLAGS="-I$ac_boost_path_tmp/include" - break; - fi - done - fi - - dnl overwrite ld flags if we have required special directory with - dnl --with-boost-libdir parameter - if test "$ac_boost_lib_path" != ""; then - BOOST_LDFLAGS="-L$ac_boost_lib_path" - fi - - CPPFLAGS_SAVED="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" - export CPPFLAGS - - LDFLAGS_SAVED="$LDFLAGS" - LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" - export LDFLAGS - - AC_LANG_PUSH(C++) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ - @%:@include - ]], [[ - #if BOOST_VERSION >= $WANT_BOOST_VERSION - // Everything is okay - #else - # error Boost version is too old - #endif - ]])],[ - AC_MSG_RESULT(yes) - succeeded=yes - found_system=yes - ],[ - ]) - AC_LANG_POP([C++]) - - - - dnl if we found no boost with system layout we search for boost libraries - dnl built and installed without the --layout=system option or for a staged(not installed) version - if test "x$succeeded" != "xyes"; then - _version=0 - if test "$ac_boost_path" != ""; then - if test -d "$ac_boost_path" && test -r "$ac_boost_path"; then - for i in `ls -d $ac_boost_path/include/boost-* 2>/dev/null`; do - _version_tmp=`echo $i | sed "s#$ac_boost_path##" | sed 's/\/include\/boost-//' | sed 's/_/./'` - V_CHECK=`expr $_version_tmp \> $_version` - if test "$V_CHECK" = "1" ; then - _version=$_version_tmp - fi - VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'` - BOOST_CPPFLAGS="-I$ac_boost_path/include/boost-$VERSION_UNDERSCORE" - done - fi - else - for ac_boost_path in /usr /usr/local /opt /opt/local ; do - if test -d "$ac_boost_path" && test -r "$ac_boost_path"; then - for i in `ls -d $ac_boost_path/include/boost-* 2>/dev/null`; do - _version_tmp=`echo $i | sed "s#$ac_boost_path##" | sed 's/\/include\/boost-//' | sed 's/_/./'` - V_CHECK=`expr $_version_tmp \> $_version` - if test "$V_CHECK" = "1" ; then - _version=$_version_tmp - best_path=$ac_boost_path - fi - done - fi - done - - VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'` - BOOST_CPPFLAGS="-I$best_path/include/boost-$VERSION_UNDERSCORE" - if test "$ac_boost_lib_path" = "" - then - BOOST_LDFLAGS="-L$best_path/lib" - fi - - if test "x$BOOST_ROOT" != "x"; then - if test -d "$BOOST_ROOT" && test -r "$BOOST_ROOT" && test -d "$BOOST_ROOT/stage/lib" && test -r "$BOOST_ROOT/stage/lib"; then - version_dir=`expr //$BOOST_ROOT : '.*/\(.*\)'` - stage_version=`echo $version_dir | sed 's/boost_//' | sed 's/_/./g'` - stage_version_shorten=`expr $stage_version : '\([[0-9]]*\.[[0-9]]*\)'` - V_CHECK=`expr $stage_version_shorten \>\= $_version` - if test "$V_CHECK" = "1" -a "$ac_boost_lib_path" = "" ; then - AC_MSG_NOTICE(We will use a staged boost library from $BOOST_ROOT) - BOOST_CPPFLAGS="-I$BOOST_ROOT" - BOOST_LDFLAGS="-L$BOOST_ROOT/stage/lib" - fi - fi - fi - fi - - CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" - export CPPFLAGS - LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" - export LDFLAGS - - AC_LANG_PUSH(C++) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ - @%:@include - ]], [[ - #if BOOST_VERSION >= $WANT_BOOST_VERSION - // Everything is okay - #else - # error Boost version is too old - #endif - ]])],[ - AC_MSG_RESULT(yes) - succeeded=yes - found_system=yes - ],[ - ]) - AC_LANG_POP([C++]) - fi - - if test "$succeeded" != "yes" ; then - if test "$_version" = "0" ; then - AC_MSG_ERROR([[We could not detect the boost libraries (version $boost_lib_version_req_shorten or higher). If you have a staged boost library (still not installed) please specify \$BOOST_ROOT in your environment and do not give a PATH to --with-boost option. If you are sure you have boost installed, then check your version number looking in . See https://www.randspringer.de/boost for more documentation.]]) - else - AC_MSG_ERROR([Your boost libraries seems too old (version $_version).]) - fi - else - AC_SUBST(BOOST_CPPFLAGS) - AC_SUBST(BOOST_LDFLAGS) - AC_DEFINE(HAVE_BOOST,,[define if the Boost library is available]) - fi - - CPPFLAGS="$CPPFLAGS_SAVED" - LDFLAGS="$LDFLAGS_SAVED" -fi - -]) diff --git a/lang/c++/m4/m4_ax_boost_filesystem.m4 b/lang/c++/m4/m4_ax_boost_filesystem.m4 deleted file mode 100644 index a52ce6ad166..00000000000 --- a/lang/c++/m4/m4_ax_boost_filesystem.m4 +++ /dev/null @@ -1,115 +0,0 @@ -# =========================================================================== -# https://www.gnu.org/software/autoconf-archive/ax_boost_filesystem.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_BOOST_FILESYSTEM -# -# DESCRIPTION -# -# Test for Filesystem library from the Boost C++ libraries. The macro -# requires a preceding call to AX_BOOST_BASE. Further documentation is -# available at . -# -# This macro calls: -# -# AC_SUBST(BOOST_FILESYSTEM_LIB) -# -# And sets: -# -# HAVE_BOOST_FILESYSTEM -# -# LICENSE -# -# Copyright (c) 2009 Thomas Porschberg -# Copyright (c) 2009 Michael Tindal -# Copyright (c) 2009 Roman Rybalko -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -#serial 13 - -AC_DEFUN([AX_BOOST_FILESYSTEM], -[ - AC_ARG_WITH([boost-filesystem], - AS_HELP_STRING([--with-boost-filesystem@<:@=special-lib@:>@], - [use the Filesystem library from boost - it is possible to specify a certain library for the linker - e.g. --with-boost-filesystem=boost_filesystem-gcc-mt ]), - [ - if test "$withval" = "no"; then - want_boost="no" - elif test "$withval" = "yes"; then - want_boost="yes" - ax_boost_user_filesystem_lib="" - else - want_boost="yes" - ax_boost_user_filesystem_lib="$withval" - fi - ], - [want_boost="yes"] - ) - - if test "x$want_boost" = "xyes"; then - AC_REQUIRE([AC_PROG_CC]) - CPPFLAGS_SAVED="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" - export CPPFLAGS - - LDFLAGS_SAVED="$LDFLAGS" - LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" - export LDFLAGS - - LIBS_SAVED=$LIBS - LIBS="$LIBS $BOOST_SYSTEM_LIB" - export LIBS - - AC_CACHE_CHECK(whether the Boost::Filesystem library is available, - ax_cv_boost_filesystem, - [AC_LANG_PUSH([C++]) - AC_COMPILE_IFELSE(AC_LANG_PROGRAM([[@%:@include ]], - [[using namespace boost::filesystem; - path my_path( "foo/bar/data.txt" ); - return 0;]]), - ax_cv_boost_filesystem=yes, ax_cv_boost_filesystem=no) - AC_LANG_POP([C++]) - ]) - if test "x$ax_cv_boost_filesystem" = "xyes"; then - AC_DEFINE(HAVE_BOOST_FILESYSTEM,,[define if the Boost::Filesystem library is available]) - BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'` - if test "x$ax_boost_user_filesystem_lib" = "x"; then - for libextension in `ls $BOOSTLIBDIR/libboost_filesystem*.{so,dylib,a}* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_filesystem.*\)\.so.*$;\1;' -e 's;^lib\(boost_filesystem.*\)\.a*$;\1;' -e 's;^lib\(boost_filesystem.*\)\.dylib$;\1;'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_FILESYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_FILESYSTEM_LIB) link_filesystem="yes"; break], - [link_filesystem="no"]) - done - if test "x$link_program_options" != "xyes"; then - for libextension in `ls $BOOSTLIBDIR/boost_filesystem*.{dll,a}* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_filesystem.*\)\.dll.*$;\1;' -e 's;^\(boost_filesystem.*\)\.a*$;\1;'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_FILESYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_FILESYSTEM_LIB) link_filesystem="yes"; break], - [link_filesystem="no"]) - done - fi - else - for ax_lib in $ax_boost_user_filesystem_lib boost_filesystem-$ax_boost_user_filesystem_lib; do - AC_CHECK_LIB($ax_lib, exit, - [BOOST_FILESYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_FILESYSTEM_LIB) link_filesystem="yes"; break], - [link_filesystem="no"]) - done - - fi - if test "x$link_filesystem" != "xyes"; then - AC_MSG_ERROR(Could not link against $ax_lib !) - fi - fi - - CPPFLAGS="$CPPFLAGS_SAVED" - LDFLAGS="$LDFLAGS_SAVED" - LIBS="$LIBS_SAVED" - fi -]) diff --git a/lang/c++/m4/m4_ax_boost_regex.m4 b/lang/c++/m4/m4_ax_boost_regex.m4 deleted file mode 100644 index 4ec2eda39f9..00000000000 --- a/lang/c++/m4/m4_ax_boost_regex.m4 +++ /dev/null @@ -1,105 +0,0 @@ -# =========================================================================== -# https://www.nongnu.org/autoconf-archive/ax_boost_regex.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_BOOST_REGEX -# -# DESCRIPTION -# -# Test for Regex library from the Boost C++ libraries. The macro requires -# a preceding call to AX_BOOST_BASE. Further documentation is available at -# . -# -# This macro calls: -# -# AC_SUBST(BOOST_REGEX_LIB) -# -# And sets: -# -# HAVE_BOOST_REGEX -# -# LICENSE -# -# Copyright (c) 2008 Thomas Porschberg -# Copyright (c) 2008 Michael Tindal -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. - -AC_DEFUN([AX_BOOST_REGEX], -[ - AC_ARG_WITH([boost-regex], - AS_HELP_STRING([--with-boost-regex@<:@=special-lib@:>@], - [use the Regex library from boost - it is possible to specify a certain library for the linker - e.g. --with-boost-regex=boost_regex-gcc-mt-d-1_33_1 ]), - [ - if test "$withval" = "no"; then - want_boost="no" - elif test "$withval" = "yes"; then - want_boost="yes" - ax_boost_user_regex_lib="" - else - want_boost="yes" - ax_boost_user_regex_lib="$withval" - fi - ], - [want_boost="yes"] - ) - - if test "x$want_boost" = "xyes"; then - AC_REQUIRE([AC_PROG_CC]) - CPPFLAGS_SAVED="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" - export CPPFLAGS - - LDFLAGS_SAVED="$LDFLAGS" - LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" - export LDFLAGS - - AC_CACHE_CHECK(whether the Boost::Regex library is available, - ax_cv_boost_regex, - [AC_LANG_PUSH([C++]) - AC_COMPILE_IFELSE(AC_LANG_PROGRAM([[@%:@include - ]], - [[boost::regex r(); return 0;]]), - ax_cv_boost_regex=yes, ax_cv_boost_regex=no) - AC_LANG_POP([C++]) - ]) - if test "x$ax_cv_boost_regex" = "xyes"; then - AC_DEFINE(HAVE_BOOST_REGEX,,[define if the Boost::Regex library is available]) - BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'` - if test "x$ax_boost_user_regex_lib" = "x"; then - for libextension in `ls $BOOSTLIBDIR/libboost_regex*.{so,a}* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_regex.*\)\.so.*$;\1;' -e 's;^lib\(boost_regex.*\)\.a*$;\1;'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_REGEX_LIB="-l$ax_lib"; AC_SUBST(BOOST_REGEX_LIB) link_regex="yes"; break], - [link_regex="no"]) - done - if test "x$link_regex" != "xyes"; then - for libextension in `ls $BOOSTLIBDIR/boost_regex*.{dll,a}* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_regex.*\)\.dll.*$;\1;' -e 's;^\(boost_regex.*\)\.a*$;\1;'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_REGEX_LIB="-l$ax_lib"; AC_SUBST(BOOST_REGEX_LIB) link_regex="yes"; break], - [link_regex="no"]) - done - fi - - else - for ax_lib in $ax_boost_user_regex_lib boost_regex-$ax_boost_user_regex_lib; do - AC_CHECK_LIB($ax_lib, main, - [BOOST_REGEX_LIB="-l$ax_lib"; AC_SUBST(BOOST_REGEX_LIB) link_regex="yes"; break], - [link_regex="no"]) - done - fi - if test "x$link_regex" != "xyes"; then - AC_MSG_ERROR(Could not link against $ax_lib !) - fi - fi - - CPPFLAGS="$CPPFLAGS_SAVED" - LDFLAGS="$LDFLAGS_SAVED" - fi -]) diff --git a/lang/c++/m4/m4_ax_boost_system.m4 b/lang/c++/m4/m4_ax_boost_system.m4 deleted file mode 100644 index 3a4cb611b2d..00000000000 --- a/lang/c++/m4/m4_ax_boost_system.m4 +++ /dev/null @@ -1,117 +0,0 @@ -# =========================================================================== -# https://www.gnu.org/software/autoconf-archive/ax_boost_system.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_BOOST_SYSTEM -# -# DESCRIPTION -# -# Test for System library from the Boost C++ libraries. The macro requires -# a preceding call to AX_BOOST_BASE. Further documentation is available at -# . -# -# This macro calls: -# -# AC_SUBST(BOOST_SYSTEM_LIB) -# -# And sets: -# -# HAVE_BOOST_SYSTEM -# -# LICENSE -# -# Copyright (c) 2008 Thomas Porschberg -# Copyright (c) 2008 Michael Tindal -# Copyright (c) 2008 Daniel Casimiro -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -#serial 7 - -AC_DEFUN([AX_BOOST_SYSTEM], -[ - AC_ARG_WITH([boost-system], - AS_HELP_STRING([--with-boost-system@<:@=special-lib@:>@], - [use the System library from boost - it is possible to specify a certain library for the linker - e.g. --with-boost-system=boost_system-gcc-mt ]), - [ - if test "$withval" = "no"; then - want_boost="no" - elif test "$withval" = "yes"; then - want_boost="yes" - ax_boost_user_system_lib="" - else - want_boost="yes" - ax_boost_user_system_lib="$withval" - fi - ], - [want_boost="yes"] - ) - - if test "x$want_boost" = "xyes"; then - AC_REQUIRE([AC_PROG_CC]) - AC_REQUIRE([AC_CANONICAL_BUILD]) - CPPFLAGS_SAVED="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" - export CPPFLAGS - - LDFLAGS_SAVED="$LDFLAGS" - LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" - export LDFLAGS - - AC_CACHE_CHECK(whether the Boost::System library is available, - ax_cv_boost_system, - [AC_LANG_PUSH([C++]) - CXXFLAGS_SAVE=$CXXFLAGS - - AC_COMPILE_IFELSE(AC_LANG_PROGRAM([[@%:@include ]], - [[boost::system::system_category]]), - ax_cv_boost_system=yes, ax_cv_boost_system=no) - CXXFLAGS=$CXXFLAGS_SAVE - AC_LANG_POP([C++]) - ]) - if test "x$ax_cv_boost_system" = "xyes"; then - AC_SUBST(BOOST_CPPFLAGS) - - AC_DEFINE(HAVE_BOOST_SYSTEM,,[define if the Boost::System library is available]) - BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'` - - LDFLAGS_SAVE=$LDFLAGS - if test "x$ax_boost_user_system_lib" = "x"; then - for libextension in `ls $BOOSTLIBDIR/libboost_system*.{so,a}* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_system.*\)\.so.*$;\1;' -e 's;^lib\(boost_system.*\)\.a*$;\1;'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_SYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_SYSTEM_LIB) link_system="yes"; break], - [link_system="no"]) - done - if test "x$link_system" != "xyes"; then - for libextension in `ls $BOOSTLIBDIR/boost_system*.{dll,a}* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_system.*\)\.dll.*$;\1;' -e 's;^\(boost_system.*\)\.a*$;\1;'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_SYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_SYSTEM_LIB) link_system="yes"; break], - [link_system="no"]) - done - fi - - else - for ax_lib in $ax_boost_user_system_lib boost_system-$ax_boost_user_system_lib; do - AC_CHECK_LIB($ax_lib, exit, - [BOOST_SYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_SYSTEM_LIB) link_system="yes"; break], - [link_system="no"]) - done - - fi - if test "x$link_system" = "xno"; then - AC_MSG_ERROR(Could not link against $ax_lib !) - fi - fi - - CPPFLAGS="$CPPFLAGS_SAVED" - LDFLAGS="$LDFLAGS_SAVED" - fi -]) diff --git a/lang/c++/m4/m4_ax_boost_thread.m4 b/lang/c++/m4/m4_ax_boost_thread.m4 deleted file mode 100644 index bff7defa7ad..00000000000 --- a/lang/c++/m4/m4_ax_boost_thread.m4 +++ /dev/null @@ -1,146 +0,0 @@ -# =========================================================================== -# https://www.gnu.org/software/autoconf-archive/ax_boost_thread.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_BOOST_THREAD -# -# DESCRIPTION -# -# Test for Thread library from the Boost C++ libraries. The macro requires -# a preceding call to AX_BOOST_BASE. Further documentation is available at -# . -# -# This macro calls: -# -# AC_SUBST(BOOST_THREAD_LIB) -# -# And sets: -# -# HAVE_BOOST_THREAD -# -# LICENSE -# -# Copyright (c) 2009 Thomas Porschberg -# Copyright (c) 2009 Michael Tindal -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -#serial 15 - -AC_DEFUN([AX_BOOST_THREAD], -[ - AC_ARG_WITH([boost-thread], - AS_HELP_STRING([--with-boost-thread@<:@=special-lib@:>@], - [use the Thread library from boost - it is possible to specify a certain library for the linker - e.g. --with-boost-thread=boost_thread-gcc-mt ]), - [ - if test "$withval" = "no"; then - want_boost="no" - elif test "$withval" = "yes"; then - want_boost="yes" - ax_boost_user_thread_lib="" - else - want_boost="yes" - ax_boost_user_thread_lib="$withval" - fi - ], - [want_boost="yes"] - ) - - if test "x$want_boost" = "xyes"; then - AC_REQUIRE([AC_PROG_CC]) - AC_REQUIRE([AC_CANONICAL_BUILD]) - CPPFLAGS_SAVED="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" - export CPPFLAGS - - LDFLAGS_SAVED="$LDFLAGS" - LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" - export LDFLAGS - - AC_CACHE_CHECK(whether the Boost::Thread library is available, - ax_cv_boost_thread, - [AC_LANG_PUSH([C++]) - CXXFLAGS_SAVE=$CXXFLAGS - - if test "x$build_os" = "xsolaris" ; then - CXXFLAGS="-pthreads $CXXFLAGS" - elif test "x$build_os" = "xming32" ; then - CXXFLAGS="-mthreads $CXXFLAGS" - else - CXXFLAGS="-pthread $CXXFLAGS" - fi - AC_COMPILE_IFELSE(AC_LANG_PROGRAM([[@%:@include ]], - [[boost::thread_group thrds; - return 0;]]), - ax_cv_boost_thread=yes, ax_cv_boost_thread=no) - CXXFLAGS=$CXXFLAGS_SAVE - AC_LANG_POP([C++]) - ]) - if test "x$ax_cv_boost_thread" = "xyes"; then - if test "x$build_os" = "xsolaris" ; then - BOOST_CPPFLAGS="-pthreads $BOOST_CPPFLAGS" - elif test "x$build_os" = "xming32" ; then - BOOST_CPPFLAGS="-mthreads $BOOST_CPPFLAGS" - else - BOOST_CPPFLAGS="-pthread $BOOST_CPPFLAGS" - fi - - AC_SUBST(BOOST_CPPFLAGS) - - AC_DEFINE(HAVE_BOOST_THREAD,,[define if the Boost::Thread library is available]) - BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'` - - LDFLAGS_SAVE=$LDFLAGS - case "x$build_os" in - *bsd* ) - LDFLAGS="-pthread $LDFLAGS" - break; - ;; - esac - if test "x$ax_boost_user_thread_lib" = "x"; then - for libextension in `ls $BOOSTLIBDIR/libboost_thread*.so* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_thread.*\)\.so.*$;\1;'` `ls $BOOSTLIBDIR/libboost_thread*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_thread.*\)\.a*$;\1;'`; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_THREAD_LIB="-l$ax_lib"; AC_SUBST(BOOST_THREAD_LIB) link_thread="yes"; break], - [link_thread="no"]) - done - if test "x$link_thread" != "xyes"; then - for libextension in `ls $BOOSTLIBDIR/boost_thread*.dll* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_thread.*\)\.dll.*$;\1;'` `ls $BOOSTLIBDIR/boost_thread*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_thread.*\)\.a*$;\1;'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_THREAD_LIB="-l$ax_lib"; AC_SUBST(BOOST_THREAD_LIB) link_thread="yes"; break], - [link_thread="no"]) - done - fi - - else - for ax_lib in $ax_boost_user_thread_lib boost_thread-$ax_boost_user_thread_lib; do - AC_CHECK_LIB($ax_lib, exit, - [BOOST_THREAD_LIB="-l$ax_lib"; AC_SUBST(BOOST_THREAD_LIB) link_thread="yes"; break], - [link_thread="no"]) - done - - fi - if test "x$link_thread" = "xno"; then - AC_MSG_ERROR(Could not link against $ax_lib !) - else - case "x$build_os" in - *bsd* ) - BOOST_LDFLAGS="-pthread $BOOST_LDFLAGS" - break; - ;; - esac - - fi - fi - - CPPFLAGS="$CPPFLAGS_SAVED" - LDFLAGS="$LDFLAGS_SAVED" - fi -]) diff --git a/lang/c++/parser/AvroLex.ll b/lang/c++/parser/AvroLex.ll deleted file mode 100644 index 6070e089a9e..00000000000 --- a/lang/c++/parser/AvroLex.ll +++ /dev/null @@ -1,203 +0,0 @@ -%{ -/* - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -// on some systems, won't find an EOF definition -#ifndef EOF -#define EOF (-1) -#endif - -#include "AvroYacc.hh" - -// this undef is a hack for my mac implementation -#undef yyFlexLexer -#include "Compiler.hh" - -#define YY_STACK_USED 1 - -using std::cin; -using std::cout; -using std::cerr; - -%} - -%option c++ -%option noyywrap - -%{ - -int yylex(int *val, void *ctx) -{ - avro::CompilerContext *c = static_cast(ctx); - int ret = c->lexer().yylex(); - if( ret > AVRO_LEX_OUTPUT_TEXT_BEGIN && ret < AVRO_LEX_OUTPUT_TEXT_END ) { - c->setText( c->lexer().YYText()) ; - } - return ret; -} - -%} - -%x READTYPE -%x STARTTYPE -%x STARTSCHEMA -%x READNAME -%x READFIELD -%x READFIELDS -%x READFIELDNAME -%x READSYMBOLS -%x READSYMBOL -%x READSIZE -%x INUNION -%x INOBJECT -%x READMETADATA -%x SKIPJSONSTRING -%x SKIPJSONARRAY -%x SKIPJSONOBJECT - -ws [ \t\r\n] -nonws [^ \t\r\n] -delim {ws}*:{ws}* -avrotext [a-zA-Z_][a-zA-Z0-9_.]* -startunion \[ -startobject \{ -integer [0-9]+ -anytext .* - -%% -int return AVRO_LEX_INT; -long return AVRO_LEX_LONG; -null return AVRO_LEX_NULL; -boolean return AVRO_LEX_BOOL; -float return AVRO_LEX_FLOAT; -double return AVRO_LEX_DOUBLE; -string return AVRO_LEX_STRING; -bytes return AVRO_LEX_BYTES; -record return AVRO_LEX_RECORD; -enum return AVRO_LEX_ENUM; -map return AVRO_LEX_MAP; -array return AVRO_LEX_ARRAY; -fixed return AVRO_LEX_FIXED; -{avrotext} return AVRO_LEX_NAMED_TYPE; -\" yy_pop_state(); - -{avrotext} return AVRO_LEX_NAME; -\" yy_pop_state(); - -{avrotext} return AVRO_LEX_SYMBOL; -\" yy_pop_state(); - -{avrotext} return AVRO_LEX_FIELD_NAME; -\" yy_pop_state(); - -\"type\"{delim} yy_push_state(STARTSCHEMA); -\"name\"{delim}\" yy_push_state(READFIELDNAME); -\} yy_pop_state(); return AVRO_LEX_FIELD_END; -, return yytext[0]; -\"{avrotext}\"+{delim} yy_push_state(READMETADATA); return AVRO_LEX_METADATA; -{ws} ; - -\{ yy_push_state(READFIELD); return AVRO_LEX_FIELD; -\] yy_pop_state(); return AVRO_LEX_FIELDS_END; -, return yytext[0]; -{ws} ; - -\" yy_push_state(READSYMBOL); -, return yytext[0]; -\] yy_pop_state(); return AVRO_LEX_SYMBOLS_END; -{ws} ; - -{integer} yy_pop_state(); return AVRO_LEX_SIZE; - -\" yy_push_state(READTYPE); return AVRO_LEX_SIMPLE_TYPE; -{startobject} yy_push_state(INOBJECT); return yytext[0]; -\] yy_pop_state(); return yytext[0]; -, return yytext[0]; -{ws} ; - -\" yy_pop_state(); -\\. ; -[^\"\\]+ ; - -\} yy_pop_state(); -\{ yy_push_state(SKIPJSONOBJECT); -\" yy_push_state(SKIPJSONSTRING); -[^\{\}\"]+ ; - -\] yy_pop_state(); -\[ yy_push_state(SKIPJSONARRAY); -\" yy_push_state(SKIPJSONSTRING); -[^\[\]\"]+ ; - -\" yy_pop_state(); yy_push_state(SKIPJSONSTRING); -\{ yy_pop_state(); yy_push_state(SKIPJSONOBJECT); -\[ yy_pop_state(); yy_push_state(SKIPJSONARRAY); -[^\"\{\[,\}]+ yy_pop_state(); - -\"type\"{delim} yy_push_state(STARTTYPE); return AVRO_LEX_TYPE; -\"name\"{delim}\" yy_push_state(READNAME); -\"size\"{delim} yy_push_state(READSIZE); -\"items\"{delim} yy_push_state(STARTSCHEMA); return AVRO_LEX_ITEMS; -\"values\"{delim} yy_push_state(STARTSCHEMA); return AVRO_LEX_VALUES; -\"fields\"{delim}\[ yy_push_state(READFIELDS); return AVRO_LEX_FIELDS; -\"symbols\"{delim}\[ yy_push_state(READSYMBOLS); return AVRO_LEX_SYMBOLS; -, return yytext[0]; -\} yy_pop_state(); return yytext[0]; -\"{avrotext}+\"{delim} yy_push_state(READMETADATA); return AVRO_LEX_METADATA; -{ws} ; - -\" yy_pop_state(); yy_push_state(READTYPE); -{startunion} yy_pop_state(); yy_push_state(INUNION); return yytext[0]; -{startobject} yy_pop_state(); yy_push_state(INOBJECT); return yytext[0]; - -\" yy_pop_state(); yy_push_state(READTYPE); return AVRO_LEX_SIMPLE_TYPE; -{startunion} yy_pop_state(); yy_push_state(INUNION); return yytext[0]; -{startobject} yy_pop_state(); yy_push_state(INOBJECT); return yytext[0]; - -{startobject} yy_push_state(INOBJECT); return yytext[0]; -{startunion} yy_push_state(INUNION); return yytext[0]; -\" yy_push_state(READTYPE); return AVRO_LEX_SIMPLE_TYPE; -{ws} ; -<> { -#if !YY_FLEX_SUBMINOR_VERSION || YY_FLEX_SUBMINOR_VERSION < 27 -// The versions of flex before 3.5.27 do not free their stack when done, so explcitly free it. -// Note that versions before did not actually define a subminor macro. - if (yy_start_stack) { - yy_flex_free(yy_start_stack); - yy_start_stack = 0; - } -#endif -#if YY_FLEX_SUBMINOR_VERSION > 35 -// At this time, 3.5.35 is the latest version. -#warning "Warning: untested version of flex" -#endif -#if YY_FLEX_SUBMINOR_VERSION >= 31 && YY_FLEX_SUBMINOR_VERSION < 34 -// The versions of flex starting 3.5.31 do not free yy_buffer_stack, so do so -// explicitly (first yy_delete_buffer must be called to free pointers stored on the stack, then it is -// safe to remove the stack). This was fixed in 3.4.34. - if(yy_buffer_stack) { - yy_delete_buffer(YY_CURRENT_BUFFER); - yyfree(yy_buffer_stack); - yy_buffer_stack = 0; - } -#endif - yyterminate(); - } - -%% - diff --git a/lang/c++/parser/AvroYacc.yy b/lang/c++/parser/AvroYacc.yy deleted file mode 100644 index 404d39585e3..00000000000 --- a/lang/c++/parser/AvroYacc.yy +++ /dev/null @@ -1,200 +0,0 @@ -%{ -/* - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include -#include "Compiler.hh" -#include "Exception.hh" - -#define YYLEX_PARAM ctx -#define YYPARSE_PARAM ctx - -void yyerror(const char *str) -{ - throw avro::Exception(boost::format("Parser error: %1%") % str); -} - -extern void *lexer; -extern int yylex(int *, void *); - -avro::CompilerContext &context(void *ctx) { - return *static_cast(ctx); -}; - -%} - -%pure-parser -%error-verbose - -%token AVRO_LEX_INT AVRO_LEX_LONG -%token AVRO_LEX_FLOAT AVRO_LEX_DOUBLE -%token AVRO_LEX_BOOL AVRO_LEX_NULL -%token AVRO_LEX_BYTES AVRO_LEX_STRING -%token AVRO_LEX_RECORD AVRO_LEX_ENUM AVRO_LEX_ARRAY AVRO_LEX_MAP AVRO_LEX_UNION AVRO_LEX_FIXED - -%token AVRO_LEX_METADATA - -%token AVRO_LEX_SYMBOLS AVRO_LEX_SYMBOLS_END -%token AVRO_LEX_FIELDS AVRO_LEX_FIELDS_END AVRO_LEX_FIELD AVRO_LEX_FIELD_END - -%token AVRO_LEX_TYPE AVRO_LEX_ITEMS AVRO_LEX_VALUES - -// Tokens that output text: -%token AVRO_LEX_OUTPUT_TEXT_BEGIN -%token AVRO_LEX_NAME -%token AVRO_LEX_NAMED_TYPE -%token AVRO_LEX_FIELD_NAME -%token AVRO_LEX_SYMBOL -%token AVRO_LEX_SIZE -%token AVRO_LEX_OUTPUT_TEXT_END - -%token AVRO_LEX_SIMPLE_TYPE - -%% - -avroschema: - simpleprimitive | object | union_t - ; - -primitive: - AVRO_LEX_INT { context(ctx).addType(avro::AVRO_INT); } - | - AVRO_LEX_LONG { context(ctx).addType(avro::AVRO_LONG); } - | - AVRO_LEX_FLOAT { context(ctx).addType(avro::AVRO_FLOAT); } - | - AVRO_LEX_DOUBLE { context(ctx).addType(avro::AVRO_DOUBLE); } - | - AVRO_LEX_BOOL { context(ctx).addType(avro::AVRO_BOOL); } - | - AVRO_LEX_NULL { context(ctx).addType(avro::AVRO_NULL); } - | - AVRO_LEX_BYTES { context(ctx).addType(avro::AVRO_BYTES); } - | - AVRO_LEX_STRING { context(ctx).addType(avro::AVRO_STRING); } - | - AVRO_LEX_NAMED_TYPE { context(ctx).addNamedType(); } - ; - -simpleprimitive: - AVRO_LEX_SIMPLE_TYPE { context(ctx).startType(); } primitive { context(ctx).stopType(); } - ; - -primitive_t: - AVRO_LEX_TYPE primitive - ; - -array_t: - AVRO_LEX_TYPE AVRO_LEX_ARRAY { context(ctx).addType(avro::AVRO_ARRAY); } - ; - -enum_t: - AVRO_LEX_TYPE AVRO_LEX_ENUM { context(ctx).addType(avro::AVRO_ENUM); } - ; - -fixed_t: - AVRO_LEX_TYPE AVRO_LEX_FIXED { context(ctx).addType(avro::AVRO_FIXED); } - ; - -map_t: - AVRO_LEX_TYPE AVRO_LEX_MAP { context(ctx).addType(avro::AVRO_MAP); } - ; - -record_t: - AVRO_LEX_TYPE AVRO_LEX_RECORD { context(ctx).addType(avro::AVRO_RECORD); } - ; - -type_attribute: - array_t | enum_t | fixed_t | map_t | record_t | primitive_t - ; - -union_t: - '[' { context(ctx).startType(); context(ctx).addType(avro::AVRO_UNION); context(ctx).setTypesAttribute(); } - unionlist - ']' { context(ctx).stopType(); } - ; - -object: - '{' { context(ctx).startType(); } - attributelist - '}' { context(ctx).stopType(); } - ; - -name_attribute: - AVRO_LEX_NAME { context(ctx).setNameAttribute(); } - ; - -size_attribute: - AVRO_LEX_SIZE { context(ctx).setSizeAttribute(); } - ; - -values_attribute: - AVRO_LEX_VALUES { context(ctx).setValuesAttribute(); } avroschema - ; - -fields_attribute: - AVRO_LEX_FIELDS { context(ctx).setFieldsAttribute(); } fieldslist AVRO_LEX_FIELDS_END - ; - -items_attribute: - AVRO_LEX_ITEMS { context(ctx).setItemsAttribute(); } avroschema - ; - -symbols_attribute: - AVRO_LEX_SYMBOLS symbollist AVRO_LEX_SYMBOLS_END - ; - -attribute: - type_attribute | name_attribute | fields_attribute | items_attribute | size_attribute | values_attribute | symbols_attribute | AVRO_LEX_METADATA - ; - -attributelist: - attribute | attributelist ',' attribute - ; - -symbol: - AVRO_LEX_SYMBOL { context(ctx).setSymbolsAttribute(); } - ; - -symbollist: - symbol | symbollist ',' symbol - ; - -fieldsetting: - fieldname | avroschema | AVRO_LEX_METADATA - ; - -fieldsettinglist: - fieldsetting | fieldsettinglist ',' fieldsetting - ; - -fields: - AVRO_LEX_FIELD fieldsettinglist AVRO_LEX_FIELD_END - ; - -fieldname: - AVRO_LEX_FIELD_NAME { context(ctx).textContainsFieldName(); } - ; - -fieldslist: - fields | fieldslist ',' fields - ; - -unionlist: - avroschema | unionlist ',' avroschema - ; diff --git a/lang/c++/test/AvrogencppTestReservedWords.cc b/lang/c++/test/AvrogencppTestReservedWords.cc index 7d305fb44d9..e3b9838e5b0 100644 --- a/lang/c++/test/AvrogencppTestReservedWords.cc +++ b/lang/c++/test/AvrogencppTestReservedWords.cc @@ -16,10 +16,11 @@ * limitations under the License. */ #include "cpp_reserved_words.hh" +#include "cpp_reserved_words_union_typedef.hh" #include "Compiler.hh" -#include +#include #ifdef min #undef min diff --git a/lang/c++/test/AvrogencppTests.cc b/lang/c++/test/AvrogencppTests.cc index 2130f818b55..38a70d1496f 100644 --- a/lang/c++/test/AvrogencppTests.cc +++ b/lang/c++/test/AvrogencppTests.cc @@ -17,13 +17,17 @@ */ #include "Compiler.hh" +#include "big_union.hh" #include "bigrecord.hh" #include "bigrecord_r.hh" #include "tweet.hh" #include "union_array_union.hh" +#include "union_empty_record.hh" #include "union_map_union.hh" +#include "union_redundant_types.hh" -#include +#include +#include #ifdef min #undef min @@ -131,6 +135,14 @@ void checkDefaultValues(const testgen_r::RootRecord &r) { BOOST_CHECK_EQUAL(r.byteswithDefaultValue.get_bytes()[1], 0xaa); } +// enable use of BOOST_CHECK_EQUAL +template<> +struct boost::test_tools::tt_detail::print_log_value { + void operator()(std::ostream &stream, const big_union::RootRecord::big_union_t::Branch &branch) const { + stream << "big_union_t::Branch{" << static_cast(branch) << "}"; + } +}; + void testEncoding() { ValidSchema s; ifstream ifs("jsonschemas/bigrecord"); @@ -219,16 +231,16 @@ void testNamespace() { twPoint.set_AvroPoint(point); } -void setRecord(uau::r1 &r) { +void setRecord(uau::r1 &) { } -void check(const uau::r1 &r1, const uau::r1 &r2) { +void check(const uau::r1 &, const uau::r1 &) { } -void setRecord(umu::r1 &r) { +void setRecord(umu::r1 &) { } -void check(const umu::r1 &r1, const umu::r1 &r2) { +void check(const umu::r1 &, const umu::r1 &) { } template @@ -267,13 +279,247 @@ void testEncoding2() { check(t2, t1); } -boost::unit_test::test_suite * -init_unit_test_suite(int /*argc*/, char * /*argv*/[]) { +void testEmptyRecord() { + uer::StackCalculator calc; + uer::StackCalculator::stack_item_t item; + item.set_int(3); + calc.stack.push_back(item); + item.set_Dup(uer::Dup()); + calc.stack.push_back(item); + item.set_Add(uer::Add()); + calc.stack.push_back(item); + + ValidSchema s; + ifstream ifs("jsonschemas/union_empty_record"); + compileJsonSchema(ifs, s); + + unique_ptr os = memoryOutputStream(); + EncoderPtr e = validatingEncoder(s, binaryEncoder()); + e->init(*os); + avro::encode(*e, calc); + e->flush(); + + DecoderPtr d = validatingDecoder(s, binaryDecoder()); + unique_ptr is = memoryInputStream(*os); + d->init(*is); + uer::StackCalculator calc2; + avro::decode(*d, calc2); + + BOOST_CHECK_EQUAL(calc.stack.size(), calc2.stack.size()); + BOOST_CHECK_EQUAL(calc2.stack[0].idx(), 0); + BOOST_CHECK_EQUAL(calc2.stack[1].idx(), 1); + BOOST_CHECK_EQUAL(calc2.stack[2].idx(), 2); +} + +void testUnionMethods() { + ValidSchema schema; + ifstream ifs_w("jsonschemas/bigrecord"); + compileJsonSchema(ifs_w, schema); + + testgen::RootRecord record; + // initialize the map and set values with getter + record.myunion.set_map({}); + record.myunion.get_map()["zero"] = 0; + record.myunion.get_map()["one"] = 1; + + std::vector bytes{1, 2, 3, 4}; + record.anotherunion.set_bytes(std::move(bytes)); + // after move assignment the local variable should be empty + BOOST_CHECK(bytes.empty()); + + unique_ptr out_stream = memoryOutputStream(); + EncoderPtr encoder = validatingEncoder(schema, binaryEncoder()); + encoder->init(*out_stream); + avro::encode(*encoder, record); + encoder->flush(); + + DecoderPtr decoder = validatingDecoder(schema, binaryDecoder()); + unique_ptr is = memoryInputStream(*out_stream); + decoder->init(*is); + testgen::RootRecord decoded_record; + avro::decode(*decoder, decoded_record); + + // check that a reference can be obtained from a union + BOOST_CHECK(decoded_record.myunion.branch() == testgen::RootRecord::myunion_t::Branch::map); + const std::map &read_map = decoded_record.myunion.get_map(); + BOOST_CHECK_EQUAL(read_map.size(), 2); + BOOST_CHECK_EQUAL(read_map.at("zero"), 0); + BOOST_CHECK_EQUAL(read_map.at("one"), 1); + + BOOST_CHECK(decoded_record.anotherunion.branch() == testgen::RootRecord::anotherunion_t::Branch::bytes); + const std::vector read_bytes = decoded_record.anotherunion.get_bytes(); + const std::vector expected_bytes{1, 2, 3, 4}; + BOOST_CHECK_EQUAL_COLLECTIONS(read_bytes.begin(), read_bytes.end(), expected_bytes.begin(), expected_bytes.end()); +} + +void testUnionBranchEnum() { + big_union::RootRecord record; + + using Branch = big_union::RootRecord::big_union_t::Branch; + + BOOST_CHECK_EQUAL(record.big_union.branch(), Branch::null); + record.big_union.set_null(); + BOOST_CHECK_EQUAL(record.big_union.branch(), Branch::null); + + record.big_union.set_bool(false); + BOOST_CHECK_EQUAL(record.big_union.branch(), Branch::bool_); + + record.big_union.set_int(123); + BOOST_CHECK_EQUAL(record.big_union.branch(), Branch::int_); + + record.big_union.set_long(456); + BOOST_CHECK_EQUAL(record.big_union.branch(), Branch::long_); + + record.big_union.set_float(555.555f); + BOOST_CHECK_EQUAL(record.big_union.branch(), Branch::float_); + + record.big_union.set_double(777.777); + BOOST_CHECK_EQUAL(record.big_union.branch(), Branch::double_); + + record.big_union.set_MD5({}); + BOOST_CHECK_EQUAL(record.big_union.branch(), Branch::MD5); + + record.big_union.set_string("test"); + BOOST_CHECK_EQUAL(record.big_union.branch(), Branch::string); + + record.big_union.set_Vec2({}); + BOOST_CHECK_EQUAL(record.big_union.branch(), Branch::Vec2); + + record.big_union.set_Vec3({}); + BOOST_CHECK_EQUAL(record.big_union.branch(), Branch::Vec3); + + record.big_union.set_Suit(big_union::Suit::CLUBS); + BOOST_CHECK_EQUAL(record.big_union.branch(), Branch::Suit); + + record.big_union.set_array({}); + BOOST_CHECK_EQUAL(record.big_union.branch(), Branch::array); + + record.big_union.set_map({}); + BOOST_CHECK_EQUAL(record.big_union.branch(), Branch::map); + + record.big_union.set_int_({}); + BOOST_CHECK_EQUAL(record.big_union.branch(), Branch::int__2); + + record.big_union.set_int__({}); + BOOST_CHECK_EQUAL(record.big_union.branch(), Branch::int__); + + record.big_union.set_Int({}); + BOOST_CHECK_EQUAL(record.big_union.branch(), Branch::Int); + + record.big_union.set__Int({}); + BOOST_CHECK_EQUAL(record.big_union.branch(), Branch::_Int); +} + +// enable use of BOOST_CHECK_EQUAL +template<> +struct boost::test_tools::tt_detail::print_log_value { + void operator()(std::ostream &stream, const std::type_info &type_info) const { + stream << "std::type_info{.name=" << type_info.name() << "}"; + } +}; + +void testNoRedundantUnionTypes() { + redundant_types::RedundantUnionSchema record; + // ensure only one class is generated for same union + BOOST_CHECK_EQUAL(typeid(record.null_string_1), typeid(record.null_string_2)); + BOOST_CHECK_EQUAL(typeid(record.string_null_1), typeid(record.string_null_2)); + BOOST_CHECK_EQUAL(typeid(record.null_Empty_1), typeid(record.null_Empty_2)); + BOOST_CHECK_EQUAL(typeid(record.null_namespace_record_1), typeid(record.null_namespace_record_2)); + BOOST_CHECK_EQUAL(typeid(record.null_int_map_1), typeid(record.null_int_map_2)); + + // different union types should have different class + BOOST_CHECK_NE(typeid(record.null_string_1), typeid(record.string_null_1)); + BOOST_CHECK_NE(typeid(record.null_string_1), typeid(record.null_string_int)); + BOOST_CHECK_NE(typeid(record.null_fixed_8), typeid(record.null_fixed_16)); + BOOST_CHECK_NE(typeid(record.null_int_map_1), typeid(record.null_long_map)); +} + +void testNoRedundantUnionTypesEncodeDecode() { + redundant_types::RedundantUnionSchema input_record; + input_record.null_string_1.set_string("null_string_1"); + input_record.null_string_2.set_string("null_string_2"); + input_record.string_null_1.set_string("string_null_1"); + input_record.string_null_2.set_string("string_null_2"); + input_record.null_string_int.set_string("null_string_int"); + input_record.null_Empty_1.set_Empty({}); + input_record.null_Empty_2.set_Empty({}); + input_record.null_namespace_record_1.set_Record({}); + input_record.null_namespace_record_2.set_Record({}); + input_record.null_fixed_8.set_fixed_8({8}); + input_record.null_fixed_16.set_fixed_16({16}); + input_record.fixed_8_fixed_16.set_fixed_16({16}); + input_record.null_int_map_1.set_map({{"null_int_map_1", 1}}); + input_record.null_int_map_2.set_map({{"null_int_map_2", 1}}); + input_record.null_long_map.set_map({{"null_long_map", 1}}); + + ValidSchema s; + ifstream ifs("jsonschemas/union_redundant_types"); + compileJsonSchema(ifs, s); + + unique_ptr os = memoryOutputStream(); + EncoderPtr e = validatingEncoder(s, binaryEncoder()); + e->init(*os); + avro::encode(*e, input_record); + e->flush(); + + DecoderPtr d = validatingDecoder(s, binaryDecoder()); + unique_ptr is = memoryInputStream(*os); + d->init(*is); + redundant_types::RedundantUnionSchema result_record; + avro::decode(*d, result_record); + + BOOST_CHECK_EQUAL(result_record.null_string_1.get_string(), "null_string_1"); + BOOST_CHECK_EQUAL(result_record.null_string_2.get_string(), "null_string_2"); + BOOST_CHECK_EQUAL(result_record.string_null_1.get_string(), "string_null_1"); + BOOST_CHECK_EQUAL(result_record.string_null_2.get_string(), "string_null_2"); + BOOST_CHECK_EQUAL(result_record.null_string_int.get_string(), "null_string_int"); + BOOST_CHECK(!result_record.null_Empty_1.is_null()); + BOOST_CHECK(!result_record.null_Empty_2.is_null()); + BOOST_CHECK(!result_record.null_namespace_record_1.is_null()); + BOOST_CHECK(!result_record.null_namespace_record_2.is_null()); + { + const auto actual = result_record.null_fixed_8.get_fixed_8(); + const std::array expected{8}; + BOOST_CHECK_EQUAL_COLLECTIONS(actual.begin(), actual.end(), expected.begin(), expected.end()); + } + { + const auto actual = result_record.null_fixed_16.get_fixed_16(); + const std::array expected{16}; + BOOST_CHECK_EQUAL_COLLECTIONS(actual.begin(), actual.end(), expected.begin(), expected.end()); + } + { + const auto actual = result_record.fixed_8_fixed_16.get_fixed_16(); + const std::array expected{16}; + BOOST_CHECK_EQUAL_COLLECTIONS(actual.begin(), actual.end(), expected.begin(), expected.end()); + } + { + const auto actual = result_record.null_int_map_1.get_map(); + BOOST_CHECK_EQUAL(actual.size(), 1); + BOOST_CHECK_EQUAL(actual.at("null_int_map_1"), 1); + } + { + const auto actual = result_record.null_int_map_2.get_map(); + BOOST_CHECK_EQUAL(actual.size(), 1); + BOOST_CHECK_EQUAL(actual.at("null_int_map_2"), 1); + } + { + const auto actual = result_record.null_long_map.get_map(); + BOOST_CHECK_EQUAL(actual.size(), 1); + BOOST_CHECK_EQUAL(actual.at("null_long_map"), 1); + } +} + +boost::unit_test::test_suite *init_unit_test_suite(int /*argc*/, char * /*argv*/[]) { auto *ts = BOOST_TEST_SUITE("Code generator tests"); ts->add(BOOST_TEST_CASE(testEncoding)); ts->add(BOOST_TEST_CASE(testResolution)); ts->add(BOOST_TEST_CASE(testEncoding2)); ts->add(BOOST_TEST_CASE(testEncoding2)); ts->add(BOOST_TEST_CASE(testNamespace)); + ts->add(BOOST_TEST_CASE(testEmptyRecord)); + ts->add(BOOST_TEST_CASE(testUnionMethods)); + ts->add(BOOST_TEST_CASE(testUnionBranchEnum)); + ts->add(BOOST_TEST_CASE(testNoRedundantUnionTypes)); + ts->add(BOOST_TEST_CASE(testNoRedundantUnionTypesEncodeDecode)); return ts; } diff --git a/lang/c++/test/CodecTests.cc b/lang/c++/test/CodecTests.cc index a99cdd67498..59aa023dbd1 100644 --- a/lang/c++/test/CodecTests.cc +++ b/lang/c++/test/CodecTests.cc @@ -25,7 +25,7 @@ #include "Specific.hh" #include "ValidSchema.hh" -#include +#include #include #include #include @@ -34,7 +34,7 @@ #include #include -#include +#include #include #include @@ -68,7 +68,7 @@ static const unsigned int count = 10; * promote data type, convert from union to plain data type and vice versa, * insert or remove fields in records or reorder fields in a record. * - * To test Json encoder and decoder, we use the same technqiue with only + * To test Json encoder and decoder, we use the same technique with only * one difference - we use JsonEncoder and JsonDecoder. * * We also use the same infrastructure to test GenericReader and GenericWriter. @@ -160,7 +160,7 @@ static string randomString(size_t len) { if (c == '\0') { c = '\x7f'; } - result.push_back(c); + result.push_back(static_cast(c)); } return result; } @@ -169,7 +169,7 @@ static vector randomBytes(size_t len) { vector result; result.reserve(len); for (size_t i = 0; i < len; ++i) { - result.push_back(rnd()); + result.push_back(static_cast(rnd())); } return result; } @@ -335,7 +335,7 @@ struct StackElement { }; } // namespace -static vector::const_iterator skipCalls(Scanner &sc, Decoder &d, +static vector::const_iterator skipCalls(Scanner &sc, Decoder &, vector::const_iterator it, bool isArray) { char end = isArray ? ']' : '}'; int level = 0; @@ -364,7 +364,7 @@ static vector::const_iterator skipCalls(Scanner &sc, Decoder &d, case 'K': case 'b': case 'f': - case 'e': ++it; // Fall through. + case 'e': ++it; [[fallthrough]]; case 'c': case 'U': sc.extractInt(); @@ -525,7 +525,7 @@ ValidSchema makeValidSchema(const char *schema) { istringstream iss(schema); ValidSchema vs; compileJsonSchema(iss, vs); - return ValidSchema(vs); + return vs; } void testEncoder(const EncoderPtr &e, const char *writerCalls, @@ -594,7 +594,6 @@ struct TestData4 { const char *readerCalls; const char *readerValues[100]; unsigned int depth; - size_t recordCount; }; void appendSentinel(OutputStream &os) { @@ -838,7 +837,7 @@ void testGenericResolving(const TestData3 &td) { GenericReader gr(wvs, rvs, d1); GenericDatum datum; gr.read(datum); - d1->drain(); + gr.drain(); assertSentinel(*in1); EncoderPtr e2 = CodecFactory::newEncoder(rvs); @@ -963,6 +962,11 @@ static const TestData data[] = { {R"({"type":"map", "values": "boolean"})", "{c1sK5Bc2sK5BsK5B}", 2}, + // Record with no fields + {"{\"type\":\"record\",\"name\":\"empty\",\"fields\":[]}", + "", 1}, + + // Single-field records {"{\"type\":\"record\",\"name\":\"r\",\"fields\":[" "{\"name\":\"f\", \"type\":\"boolean\"}]}", "B", 1}, @@ -1002,6 +1006,16 @@ static const TestData data[] = { "{\"name\":\"f7\", \"type\":\"bytes\"}]}", "NBILFDS10b25", 1}, // record of records + {"{\"type\":\"record\",\"name\":\"r\",\"fields\":[" + "{\"name\":\"f1\",\"type\":\"boolean\"}," + "{\"name\":\"f2\", \"type\":{\"type\":\"record\"," + "\"name\":\"inner\",\"fields\":[]}}]}", + "B", 1}, + {"{\"type\":\"record\",\"name\":\"r\",\"fields\":[" + "{\"name\":\"f1\",\"type\":\"boolean\"}," + "{\"name\":\"f2\", \"type\":{\"type\":\"array\"," + "\"items\":\"r\"}}]}", + "B[]", 1}, {"{\"type\":\"record\",\"name\":\"outer\",\"fields\":[" "{\"name\":\"f1\", \"type\":{\"type\":\"record\", " "\"name\":\"inner\", \"fields\":[" @@ -1264,249 +1278,558 @@ static const TestData3 data3[] = { {R"(["boolean", "int"])", "U1I", R"(["boolean", "long"])", "U1L", 1}, {R"(["boolean", "int"])", "U1I", R"(["long", "boolean"])", "U0L", 1}, + + // Aliases + {R"({"type": "record", "name": "r", "fields": [ + {"name": "f0", "type": "int"}, + {"name": "f1", "type": "boolean"}, + {"name": "f2", "type": "double"}]})", + "IBD", + R"({"type":"record", "name":"s", "aliases":["r"], "fields":[ + {"name":"g0", "type":"int", "aliases":["f0"]}, + {"name":"g1", "type":"boolean", "aliases":["f1"]}, + {"name":"f2", "type":"double", "aliases":["g2"]}]})", + "IBD", + 1}, + {R"({"type": "record", "name": "r", "namespace": "n", "fields": [ + {"name": "f0", "type": "int"}]})", + "I", + R"({"type": "record", "name": "s", "namespace": "n2", "aliases": ["t", "n.r"], "fields":[ + {"name": "f0", "type": "int"}]})", + "I", + 1}, + {R"({"type": "enum", "name": "e", "symbols": ["a", "b"]})", + "e1", + R"({"type": "enum", "name": "f", "aliases": ["e"], "symbols":["a", "b", "c"]})", + "e1", + 1}, + {R"({"type": "enum", "name": "e", "namespace": "n", "symbols": ["a", "b"]})", + "e1", + R"({"type": "enum", "name": "f", "namespace": "n2", "aliases": ["g", "n.e"], "symbols": ["a", "b"]})", + "e1", + 1}, + {R"({"type": "fixed", "name": "f", "size": 8})", + "f8", + R"({"type": "fixed", "name": "g", "aliases": ["f"], "size": 8})", + "f8", + 1}, + {R"({"type": "fixed", "name": "f", "namespace": "n", "size": 8})", + "f8", + R"({"type": "fixed", "name": "g", "namespace": "n2", "aliases": ["h", "n.f"], "size": 8})", + "f8", + 1}, + {R"({"type": "record", "name": "r1", "fields": [ + {"name": "f1", "type": ["null", {"type": "record", "name": "r2", "fields": [{"name": "f11", "type": "string"}]}]}, + {"name": "f2", "type": {"type": "array", "items": "r2"}} + ]})", + "U0N[c3sS1sS2sS3]", + R"({"type": "record", "name": "r1", "fields": [ + {"name": "f1", "type": [ + "null", + {"type": "record", "name": "r2", "fields": [{"name": "f11", "type": "string"}]}, + {"type": "record", "name": "r3", "fields": [ + {"name": "g11", "type": {"type": "array", "items": {"type": "record", "name": "r31", "fields": [{"name": "g111", "type": "double"}]}}} + ]} + ]}, + {"name": "f2", "type": {"type": "array", "items": "r2"}}, + {"name": "f3", "type": {"type": "array", "items": "r3"}, "default": []} + ]})", + "U0N[c3sS1sS2sS3][]", + 1}, + { + R"({"name": "Project", "type": "record", "fields": [ + { "name": "_types", "type": [ + "null", + { "name": "Record1", "type": "record", "fields": [{ "name": "Record1_field1", "type": "string" }]} + ]}, + { "name": "field1", "type": { "type": "array", "items": "Record1" } } + ]})", + "U0N[c3sS1sS2sS3]", + R"({"name": "Project", "type": "record", "fields": [ + { "name": "_types", "type": [ + "null", + { "name": "Record1", "type": "record", "fields": [{ "name": "Record1_field1", "type": "string" }]}, + { "name": "Record3", "type": "record", "fields": [ + { "name": "Record3_field1", "type": { "type": "array", "items": { "name": "Record2", "type": "record", + "fields":[{ "name": "Record2_field1", "type": "double" }]} + }} + ]} + ]}, + { "name": "field1", "type": { "type": "array", "items": "Record1" } }, + { "name": "field2", "type": { "type": "array", "items": "Record3" }, "default": [] } + ]})", + "U0N[c3sS1sS2sS3][]", + 1}, + { + R"({"name": "Project", "type": "record", "fields": [ + { "name": "_types", "type": [ + "null", + { "name": "Record1", "type": "record", "fields": [{ "name": "Record1_field1", "type": "string" }]}, + { "name": "Record3", "type": "record", "fields": [ + { "name": "Record3_field1", "type": { "type": "array", "items": { "name": "Record2", "type": "record", + "fields":[{ "name": "Record2_field1", "type": "double" }]} + }} + ]} + ]}, + { "name": "field1", "type": { "type": "array", "items": "Record1" } }, + { "name": "field2", "type": { "type": "array", "items": "Record3" }, "default": [] } + ]})", + "U0N[c3sS1sS2sS3][]", + R"({"name": "Project", "type": "record", "fields": [ + { "name": "_types", "type": [ + "null", + { "name": "Record1", "type": "record", "fields": [{ "name": "Record1_field1", "type": "string" }]} + ]}, + { "name": "field1", "type": { "type": "array", "items": "Record1" } } + ]})", + "U0N[c3sS1sS2sS3]", + 1}, }; static const TestData4 data4[] = { // Projection - {"{\"type\":\"record\",\"name\":\"r\",\"fields\":[" - "{\"name\":\"f1\", \"type\":\"string\"}," - "{\"name\":\"f2\", \"type\":\"string\"}," - "{\"name\":\"f3\", \"type\":\"int\"}]}", - "S10S10IS10S10I", - {"s1", "s2", "100", "t1", "t2", "200", nullptr}, - "{\"type\":\"record\",\"name\":\"r\",\"fields\":[" - "{\"name\":\"f1\", \"type\":\"string\" }," - "{\"name\":\"f2\", \"type\":\"string\"}]}", - "RS10S10RS10S10", - {"s1", "s2", "t1", "t2", nullptr}, - 1, - 2}, + { + R"({ + "type": "record", + "name": "r", + "fields": [ + {"name": "f1", "type": "string"}, + {"name": "f2", "type": "string"}, + {"name": "f3", "type": "int"} + ] + })", + "S10S10IS10S10I", + {"s1", "s2", "100", "t1", "t2", "200", nullptr}, + R"({ + "type": "record", + "name": "r", + "fields": [ + {"name": "f1", "type": "string"}, + {"name": "f2", "type": "string"} + ] + })", + "RS10S10RS10S10", + {"s1", "s2", "t1", "t2", nullptr}, + 1}, // Reordered fields - {"{\"type\":\"record\",\"name\":\"r\",\"fields\":[" - "{\"name\":\"f1\", \"type\":\"int\"}," - "{\"name\":\"f2\", \"type\":\"string\"}]}", - "IS10", - {"10", "hello", nullptr}, - "{\"type\":\"record\",\"name\":\"r\",\"fields\":[" - "{\"name\":\"f2\", \"type\":\"string\" }," - "{\"name\":\"f1\", \"type\":\"long\"}]}", - "RLS10", - {"10", "hello", nullptr}, - 1, - 1}, + { + R"({ + "type": "record", + "name": "r", + "fields": [ + {"name": "f1", "type": "int"}, + {"name": "f2", "type": "string"} + ] + })", + "IS10", + {"10", "hello", nullptr}, + R"({ + "type": "record", + "name": "r", + "fields": [ + {"name": "f2", "type": "string" }, + {"name": "f1", "type": "long"} + ] + })", + "RLS10", + {"10", "hello", nullptr}, + 1}, // Default values - {R"({"type":"record","name":"r","fields":[]})", "", {nullptr}, "{\"type\":\"record\",\"name\":\"r\",\"fields\":[" - "{\"name\":\"f\", \"type\":\"int\", \"default\": 100}]}", - "RI", - {"100", nullptr}, - 1, - 1}, - - {"{\"type\":\"record\",\"name\":\"r\",\"fields\":[" - "{\"name\":\"f2\", \"type\":\"int\"}]}", + { + R"({"type": "record", "name": "r", "fields": []})", + "", + {nullptr}, + R"({ + "type": "record", + "name": "r", + "fields": [{"name": "f", "type": "int", "default": 100}] + })", + "RI", + {"100", nullptr}, + 1}, + + {R"({"type": "record", "name": "r", "fields": [{"name": "f2", "type": "int"}]})", "I", {"10", nullptr}, - "{\"type\":\"record\",\"name\":\"r\",\"fields\":[" - "{\"name\":\"f1\", \"type\":\"int\", \"default\": 101}," - "{\"name\":\"f2\", \"type\":\"int\"}]}", + R"({ + "type": "record", + "name": "r", + "fields": [ + {"name": "f1", "type": "int", "default": 101}, + {"name": "f2", "type": "int"} + ] + })", "RII", {"10", "101", nullptr}, - 1, 1}, - {"{\"type\":\"record\",\"name\":\"outer\",\"fields\":[" - "{\"name\": \"g1\", " - "\"type\":{\"type\":\"record\",\"name\":\"inner\",\"fields\":[" - "{\"name\":\"f2\", \"type\":\"int\"}]}}, " - "{\"name\": \"g2\", \"type\": \"long\"}]}", - "IL", - {"10", "11", nullptr}, - "{\"type\":\"record\",\"name\":\"outer\",\"fields\":[" - "{\"name\": \"g1\", " - "\"type\":{\"type\":\"record\",\"name\":\"inner\",\"fields\":[" - "{\"name\":\"f1\", \"type\":\"int\", \"default\": 101}," - "{\"name\":\"f2\", \"type\":\"int\"}]}}, " - "{\"name\": \"g2\", \"type\": \"long\"}]}}", - "RRIIL", - {"10", "101", "11", nullptr}, - 1, - 1}, + { + R"({ + "type": "record", + "name": "outer", + "fields": [ + { + "name": "g1", + "type": { + "type": "record", + "name": "inner", + "fields": [{"name": "f2", "type": "int"}] + } + }, + {"name": "g2", "type": "long"} + ] + })", + "IL", + {"10", "11", nullptr}, + R"({ + "type": "record", + "name": "outer", + "fields": [ + { + "name": "g1", + "type": { + "type": "record", + "name": "inner", + "fields": [ + { + "name": "f1", + "type": "int", + "default": 101 + }, + {"name": "f2", "type": "int"} + ] + } + }, + {"name": "g2", "type": "long"} + ] + })", + "RRIIL", + {"10", "101", "11", nullptr}, + 1}, // Default value for a record. - {"{\"type\":\"record\",\"name\":\"outer\",\"fields\":[" - "{\"name\": \"g1\", " - "\"type\":{\"type\":\"record\",\"name\":\"inner1\",\"fields\":[" - "{\"name\":\"f1\", \"type\":\"long\" }," - "{\"name\":\"f2\", \"type\":\"int\"}] } }, " - "{\"name\": \"g2\", \"type\": \"long\"}]}", - "LIL", - {"10", "12", "13", nullptr}, - "{\"type\":\"record\",\"name\":\"outer\",\"fields\":[" - "{\"name\": \"g1\", " - "\"type\":{\"type\":\"record\",\"name\":\"inner1\",\"fields\":[" - "{\"name\":\"f1\", \"type\":\"long\" }," - "{\"name\":\"f2\", \"type\":\"int\"}] } }, " - "{\"name\": \"g2\", \"type\": \"long\"}," - "{\"name\": \"g3\", " - "\"type\":{\"type\":\"record\",\"name\":\"inner2\",\"fields\":[" - "{\"name\":\"f1\", \"type\":\"long\" }," - "{\"name\":\"f2\", \"type\":\"int\"}] }, " - "\"default\": { \"f1\": 15, \"f2\": 101 } }] } ", - "RRLILRLI", - {"10", "12", "13", "15", "101", nullptr}, - 1, - 1}, + { + R"({ + "type": "record", + "name": "outer", + "fields": [ + { + "name": "g1", + "type": { + "type": "record", + "name": "inner1", + "fields": [ + {"name": "f1", "type": "long"}, + {"name": "f2", "type": "int"} + ] + } + }, + {"name": "g2", "type": "long"} + ] + })", + "LIL", + {"10", "12", "13", nullptr}, + R"({ + "type": "record", + "name": "outer", + "fields": [ + { + "name": "g1", + "type": { + "type": "record", + "name": "inner1", + "fields": [ + {"name": "f1", "type": "long"}, + {"name": "f2", "type": "int"} + ] + } + }, + {"name": "g2", "type": "long"}, + { + "name": "g3", + "type": { + "type": "record", + "name": "inner2", + "fields": [ + {"name": "f1", "type": "long"}, + {"name": "f2", "type": "int"} + ] + }, + "default": {"f1": 15, "f2": 101} + } + ] + })", + "RRLILRLI", + {"10", "12", "13", "15", "101", nullptr}, + 1}, - {"{\"type\":\"record\",\"name\":\"outer\",\"fields\":[" - "{\"name\": \"g1\", " - "\"type\":{\"type\":\"record\",\"name\":\"inner1\",\"fields\":[" - "{\"name\":\"f1\", \"type\":\"long\" }," - "{\"name\":\"f2\", \"type\":\"int\"}] } }, " - "{\"name\": \"g2\", \"type\": \"long\"}]}", - "LIL", - {"10", "12", "13", nullptr}, - "{\"type\":\"record\",\"name\":\"outer\",\"fields\":[" - "{\"name\": \"g1\", " - "\"type\":{\"type\":\"record\",\"name\":\"inner1\",\"fields\":[" - "{\"name\":\"f1\", \"type\":\"long\" }," - "{\"name\":\"f2\", \"type\":\"int\"}] } }, " - "{\"name\": \"g2\", \"type\": \"long\"}," - "{\"name\": \"g3\", " - "\"type\":\"inner1\", " - "\"default\": { \"f1\": 15, \"f2\": 101 } }] } ", - "RRLILRLI", - {"10", "12", "13", "15", "101", nullptr}, - 1, - 1}, + { + R"({ + "type": "record", + "name": "outer", + "fields": [ + { + "name": "g1", + "type": { + "type": "record", + "name": "inner1", + "fields": [ + {"name": "f1", "type": "long"}, + {"name": "f2", "type": "int"} + ] + } + }, + {"name": "g2", "type": "long"} + ] + })", + "LIL", + {"10", "12", "13", nullptr}, + R"({ + "type": "record", + "name": "outer", + "fields": [ + { + "name": "g1", + "type": { + "type": "record", + "name": "inner1", + "fields": [ + {"name": "f1", "type": "long"}, + {"name": "f2", "type": "int"} + ] + } + }, + {"name": "g2", "type": "long"}, + { + "name": "g3", + "type": "inner1", + "default": {"f1": 15, "f2": 101} + } + ] + })", + "RRLILRLI", + {"10", "12", "13", "15", "101", nullptr}, + 1}, - {R"({"type":"record","name":"r","fields":[]})", "", {nullptr}, "{\"type\":\"record\",\"name\":\"r\",\"fields\":[" - "{\"name\":\"f\", \"type\":{ \"type\": \"array\", \"items\": \"int\" }," - "\"default\": [100]}]}", - "[c1sI]", - {"100", nullptr}, - 1, - 1}, + // TODO mkmkme HERE + { + R"({ + "type": "record", + "name": "r", + "fields": [] + })", + "", + {nullptr}, + R"({ + "type": "record", + "name": "r", + "fields": [ + { + "name": "f", + "type": {"type": "array", "items": "int"}, + "default": [100] + } + ] + })", + "[c1sI]", + {"100", nullptr}, + 1}, - {"{ \"type\": \"array\", \"items\": {\"type\":\"record\"," - "\"name\":\"r\",\"fields\":[" - "{\"name\":\"f0\", \"type\": \"int\"}]} }", - "[c1sI]", - {"99", nullptr}, - "{ \"type\": \"array\", \"items\": {\"type\":\"record\"," - "\"name\":\"r\",\"fields\":[" - "{\"name\":\"f\", \"type\":\"int\", \"default\": 100}]} }", - "[Rc1sI]", - {"100", nullptr}, - 1, - 1}, + { + R"({ + "type": "array", + "items": { + "type": "record", + "name": "r", + "fields": [{"name": "f0", "type": "int"}] + } + })", + "[c1sI]", + {"99", nullptr}, + R"({ + "type": "array", + "items": { + "type": "record", + "name": "r", + "fields": [{"name": "f", "type": "int", "default": 100}] + } + })", + "[Rc1sI]", + {"100", nullptr}, + 1}, // Record of array of record with deleted field as last field - {"{\"type\":\"record\",\"name\":\"outer\",\"fields\":[" - "{\"name\": \"g1\"," - "\"type\":{\"type\":\"array\",\"items\":{" - "\"name\":\"item\",\"type\":\"record\",\"fields\":[" - "{\"name\":\"f1\", \"type\":\"int\"}," - "{\"name\":\"f2\", \"type\": \"long\", \"default\": 0}]}}}]}", - "[c1sIL]", - {"10", "11", nullptr}, - "{\"type\":\"record\",\"name\":\"outer\",\"fields\":[" - "{\"name\": \"g1\"," - "\"type\":{\"type\":\"array\",\"items\":{" - "\"name\":\"item\",\"type\":\"record\",\"fields\":[" - "{\"name\":\"f1\", \"type\":\"int\"}]}}}]}", - "R[c1sI]", - {"10", nullptr}, - 2, - 1}, + { + R"({ + "type": "record", + "name": "outer", + "fields":[ + { + "name": "g1", + "type": { + "type": "array", + "items": { + "name": "item", + "type": "record", + "fields": [ + {"name": "f1", "type": "int"}, + {"name": "f2", "type": "long", "default": 0} + ] + } + } + } + ] + })", + "[c1sIL]", + {"10", "11", nullptr}, + R"({ + "type": "record", + "name": "outer", + "fields": [ + { + "name": "g1", + "type": { + "type": "array", + "items": { + "name": "item", + "type": "record", + "fields": [{"name": "f1", "type": "int"}] + } + } + } + ] + })", + "R[c1sI]", + {"10", nullptr}, + 2}, // Enum resolution - {R"({"type":"enum","name":"e","symbols":["x","y","z"]})", - "e2", - {nullptr}, - R"({"type":"enum","name":"e","symbols":[ "y", "z" ]})", - "e1", - {nullptr}, - 1, - 1}, - - {R"({"type":"enum","name":"e","symbols":[ "x", "y" ]})", + { + R"({"type":"enum","name":"e","symbols":["x","y","z"]})", + "e2", + {nullptr}, + R"({"type": "enum", "name": "e", "symbols": ["y", "z"]})", + "e1", + {nullptr}, + 1}, + + {R"({"type": "enum", "name": "e", "symbols": ["x", "y"]})", "e1", {nullptr}, - R"({"type":"enum","name":"e","symbols":[ "y", "z" ]})", + R"({"type": "enum", "name": "e", "symbols": ["y", "z"]})", "e0", {nullptr}, - 1, 1}, // Union - {"\"int\"", "I", {"100", nullptr}, R"([ "long", "int"])", "U1I", {"100", nullptr}, 1, 1}, - - {R"([ "long", "int"])", "U1I", {"100", nullptr}, "\"int\"", "I", {"100", nullptr}, 1, 1}, + { + R"("int")", + "I", + {"100", nullptr}, + R"(["long", "int"])", + "U1I", + {"100", nullptr}, + 1}, + + {R"(["long", "int"])", + "U1I", + {"100", nullptr}, + R"("int")", + "I", + {"100", nullptr}, + 1}, // Arrray of unions - {R"({"type":"array", "items":[ "long", "int"]})", - "[c2sU1IsU1I]", - {"100", "100", nullptr}, - R"({"type":"array", "items": "int"})", - "[c2sIsI]", - {"100", "100", nullptr}, - 2, - 1}, + { + R"({"type": "array", "items": ["long", "int"]})", + "[c2sU1IsU1I]", + {"100", "100", nullptr}, + R"({"type":"array", "items": "int"})", + "[c2sIsI]", + {"100", "100", nullptr}, + 2}, // Map of unions - {R"({"type":"map", "values":[ "long", "int"]})", - "{c2sS10U1IsS10U1I}", - {"k1", "100", "k2", "100", nullptr}, - R"({"type":"map", "values": "int"})", - "{c2sS10IsS10I}", - {"k1", "100", "k2", "100", nullptr}, - 2, - 1}, + { + R"({"type": "map", "values": ["long", "int"]})", + "{c2sS10U1IsS10U1I}", + {"k1", "100", "k2", "100", nullptr}, + R"({"type":"map", "values": "int"})", + "{c2sS10IsS10I}", + {"k1", "100", "k2", "100", nullptr}, + 2}, // Union + promotion - {"\"int\"", "I", {"100", nullptr}, R"([ "long", "string"])", "U0L", {"100", nullptr}, 1, 1}, - - {R"([ "int", "string"])", "U0I", {"100", nullptr}, "\"long\"", "L", {"100", nullptr}, 1, 1}, + { + R"("int")", + "I", + {"100", nullptr}, + R"(["long", "string"])", + "U0L", + {"100", nullptr}, + 1}, + + {R"(["int", "string"])", + "U0I", + {"100", nullptr}, + R"("long")", + "L", + {"100", nullptr}, + 1}, // Record where union field is skipped. - {"{\"type\":\"record\",\"name\":\"r\",\"fields\":[" - "{\"name\":\"f0\", \"type\":\"boolean\"}," - "{\"name\":\"f1\", \"type\":\"int\"}," - "{\"name\":\"f2\", \"type\":[\"int\", \"long\"]}," - "{\"name\":\"f3\", \"type\":\"float\"}" - "]}", - "BIU0IF", - {"1", "100", "121", "10.75", nullptr}, - "{\"type\":\"record\",\"name\":\"r\",\"fields\":[" - "{\"name\":\"f0\", \"type\":\"boolean\"}," - "{\"name\":\"f1\", \"type\":\"long\"}," - "{\"name\":\"f3\", \"type\":\"double\"}]}", - "BLD", - {"1", "100", "10.75", nullptr}, - 1, - 1}, + { + R"({ + "type": "record", + "name": "r", + "fields": [ + {"name": "f0", "type": "boolean"}, + {"name": "f1", "type": "int"}, + {"name": "f2", "type": ["int", "long"]}, + {"name": "f3", "type": "float"} + ] + })", + "BIU0IF", + {"1", "100", "121", "10.75", nullptr}, + R"({ + "type": "record", + "name": "r", + "fields": [ + {"name": "f0", "type": "boolean"}, + {"name": "f1", "type": "long"}, + {"name": "f3", "type": "double"} + ] + })", + "BLD", + {"1", "100", "10.75", nullptr}, + 1}, }; static const TestData4 data4BinaryOnly[] = { // Arrray of unions - {R"({"type":"array", "items":[ "long", "int"]})", - "[c1sU1Ic1sU1I]", - {"100", "100", nullptr}, - R"({"type":"array", "items": "int"})", - "[c1sIc1sI]", - {"100", "100", nullptr}, - 2}, + { + R"({ + "type":"array", + "items": ["long", "int"] + })", + "[c1sU1Ic1sU1I]", + {"100", "100", nullptr}, + R"({"type":"array", "items": "int"})", + "[c1sIc1sI]", + {"100", "100", nullptr}, + 2}, // Map of unions - {R"({"type":"map", "values":[ "long", "int"]})", - "{c1sS10U1Ic1sS10U1I}", - {"k1", "100", "k2", "100", nullptr}, - R"({"type":"map", "values": "int"})", - "{c1sS10Ic1sS10I}", - {"k1", "100", "k2", "100", nullptr}, - 2}, + { + R"({"type":"map", "values":[ "long", "int"]})", + "{c1sS10U1Ic1sS10U1I}", + {"k1", "100", "k2", "100", nullptr}, + R"({"type":"map", "values": "int"})", + "{c1sS10Ic1sS10I}", + {"k1", "100", "k2", "100", nullptr}, + 2}, }; #define COUNTOF(x) sizeof(x) / sizeof(x[0]) @@ -1524,13 +1847,13 @@ Test testWithData(const Test &test, const Data &) { testWithData(&testFunc, data), data, data + COUNTOF(data))) struct BinaryEncoderFactory { - static EncoderPtr newEncoder(const ValidSchema &schema) { + static EncoderPtr newEncoder(const ValidSchema &) { return binaryEncoder(); } }; struct BinaryDecoderFactory { - static DecoderPtr newDecoder(const ValidSchema &schema) { + static DecoderPtr newDecoder(const ValidSchema &) { return binaryDecoder(); } }; @@ -1777,6 +2100,40 @@ static void testJsonCodecReinit() { } } +static void testArrayNegativeBlockCount() { + // Array of ints [10, 20, 30, 40, 50] encoded with a negative block count + // in the second block, which exercises arrayNext(). + // Per the Avro spec, a negative count means: abs(count) items follow, + // preceded by a long byte-size of the block. + // + // Block 1: count=2, items: 10, 20 (read by arrayStart) + // Block 2: count=-3, bytesize=3, items: 30, 40, 50 (read by arrayNext) + // Terminal: count=0 + const uint8_t data[] = { + 0x04, // zigzag(2) = 4: block count = 2 + 0x14, 0x28, // zigzag ints: 10, 20 + 0x05, // zigzag(-3) = 5: block count = -3 + 0x06, // zigzag(3) = 6: byte-size of block + 0x3c, 0x50, 0x64, // zigzag ints: 30, 40, 50 + 0x00 // terminal + }; + + InputStreamPtr is = memoryInputStream(data, sizeof(data)); + DecoderPtr d = binaryDecoder(); + d->init(*is); + + std::vector result; + for (size_t n = d->arrayStart(); n != 0; n = d->arrayNext()) { + for (size_t i = 0; i < n; ++i) { + result.push_back(d->decodeInt()); + } + } + + const std::vector expected = {10, 20, 30, 40, 50}; + BOOST_CHECK_EQUAL_COLLECTIONS(result.begin(), result.end(), + expected.begin(), expected.end()); +} + static void testByteCount() { OutputStreamPtr os1 = memoryOutputStream(); EncoderPtr e1 = binaryEncoder(); @@ -1791,7 +2148,7 @@ static void testByteCount() { } // namespace avro boost::unit_test::test_suite * -init_unit_test_suite(int argc, char *argv[]) { +init_unit_test_suite(int, char *[]) { using namespace boost::unit_test; auto *ts = BOOST_TEST_SUITE("Avro C++ unit tests for codecs"); @@ -1802,6 +2159,7 @@ init_unit_test_suite(int argc, char *argv[]) { ts->add(BOOST_PARAM_TEST_CASE(&avro::testJson, avro::jsonData, ENDOF(avro::jsonData))); ts->add(BOOST_TEST_CASE(avro::testJsonCodecReinit)); + ts->add(BOOST_TEST_CASE(avro::testArrayNegativeBlockCount)); ts->add(BOOST_TEST_CASE(avro::testByteCount)); return ts; diff --git a/lang/c++/test/CommonsSchemasTests.cc b/lang/c++/test/CommonsSchemasTests.cc new file mode 100644 index 00000000000..a373fe841ce --- /dev/null +++ b/lang/c++/test/CommonsSchemasTests.cc @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +#include "Compiler.hh" +#include "DataFile.hh" +#include "Generic.hh" +#include "ValidSchema.hh" +#include +#include +#include + +using avro::DataFileReader; +using avro::DataFileWriter; +using avro::GenericDatum; +using avro::GenericReader; +using avro::validatingDecoder; + +void testCommonSchema(const std::filesystem::path &dir_path) { + const std::filesystem::path &schemaFile = dir_path / "schema.json"; + std::ifstream in(schemaFile.c_str()); + + avro::ValidSchema schema; + avro::compileJsonSchema(in, schema); + + const std::filesystem::path &dataFile = dir_path / "data.avro"; + + GenericDatum datum(schema); + const std::filesystem::path &outputDataFile = dir_path / "data_out.avro"; + + DataFileReader reader(dataFile.string().c_str()); + DataFileWriter writer(outputDataFile.string().c_str(), schema); + + while (reader.read(datum)) { + datum.value(); + writer.write(datum); + } + writer.close(); + reader.close(); + + GenericDatum datumOrig(schema); + GenericDatum datumNew(schema); + + DataFileReader readerOrig(dataFile.string().c_str()); + DataFileReader readerNew(outputDataFile.string().c_str()); + while (readerOrig.read(datumOrig)) { + BOOST_CHECK(readerNew.read(datumNew)); + avro::GenericRecord &rec1 = datumOrig.value(); + avro::GenericRecord &rec2 = datumNew.value(); + BOOST_CHECK_EQUAL(rec1.fieldCount(), rec2.fieldCount()); + } + BOOST_CHECK(!readerNew.read(datumNew)); + + readerNew.close(); + readerOrig.close(); + + std::filesystem::remove(outputDataFile); +} + +void testCommonsSchemas() { + const std::filesystem::path commons_schemas{"../../share/test/data/schemas"}; + if (!std::filesystem::exists(commons_schemas)) { + std::cout << "\nWarn: Can't access share test folder '../../share/test/data/schemas'\n" + << std::endl; + return; + } + for (auto const &dir_entry : std::filesystem::directory_iterator{commons_schemas}) { + if (std::filesystem::is_directory(dir_entry)) { + testCommonSchema(dir_entry.path()); + } + } +} + +boost::unit_test::test_suite * +init_unit_test_suite(int /*argc*/, char * /*argv*/[]) { + using namespace boost::unit_test; + + auto *ts = BOOST_TEST_SUITE("Avro C++ unit tests for commons schemas"); + ts->add(BOOST_TEST_CASE(&testCommonsSchemas)); + return ts; +} diff --git a/lang/c++/test/CompilerTests.cc b/lang/c++/test/CompilerTests.cc index e3d4426a3c6..072b9775666 100644 --- a/lang/c++/test/CompilerTests.cc +++ b/lang/c++/test/CompilerTests.cc @@ -18,10 +18,11 @@ #include -#include +#include #include #include "Compiler.hh" +#include "Node.hh" #include "ValidSchema.hh" // Assert that empty defaults don't make json schema compilation violate bounds @@ -82,6 +83,66 @@ void test2dArray() { BOOST_CHECK_EQUAL(expected, actual.str()); } +void testRecordWithNamedReference() { + std::string nestedSchema = "{\"name\":\"NestedRecord\",\"type\":\"record\",\"fields\":[{\"name\":\"stringField\",\"type\":\"string\"}]}"; + // The root schema references the nested schema above by name only. + // This mimics tools that allow schemas to have references to other schemas. + std::string rootSchema = "{\"name\":\"RootRecord\",\"type\":\"record\",\"fields\":[{\"name\": \"nestedField\",\"type\":\"NestedRecord\"}]}"; + + // First compile the nested schema + avro::ValidSchema nestedRecord = avro::compileJsonSchemaFromString(nestedSchema); + + // Create a map of named references + std::map namedReferences; + namedReferences[avro::Name("NestedRecord")] = nestedRecord; + + // Parse the root schema with named references + std::istringstream rootSchemaStream(rootSchema); + avro::ValidSchema rootRecord = avro::compileJsonSchemaWithNamedReferences(rootSchemaStream, namedReferences); + + // Verify the schema was compiled correctly + BOOST_CHECK_EQUAL("RootRecord", rootRecord.root()->name().simpleName()); + + // Get the nested field and verify its type + const avro::NodePtr &rootNode = rootRecord.root(); + BOOST_CHECK_EQUAL(avro::AVRO_RECORD, rootNode->type()); + BOOST_CHECK_EQUAL(1, rootNode->leaves()); + + const avro::NodePtr &nestedFieldNode = rootNode->leafAt(0); + BOOST_CHECK_EQUAL("NestedRecord", nestedFieldNode->name().simpleName()); +} + +// Verify recursive schemas don't create shared_ptr cycles by ensuring the +// root node expires once the ValidSchema goes out of scope. Example: binary +// tree node with left/right as union of null and the node type itself. +void testRecursiveBinaryTreeWeakPtrExpires() { + std::weak_ptr weakRoot; + + { + const std::string schema = R"({ + "type": "record", + "name": "Node", + "fields": [ + {"name": "value", "type": "int"}, + {"name": "left", "type": ["null", "Node"], "default": null}, + {"name": "right", "type": ["null", "Node"], "default": null} + ] + })"; + + avro::ValidSchema s = avro::compileJsonSchemaFromString(schema); + // Capture a weak reference to the root node while the schema is alive. + weakRoot = s.root(); + + // Optionally exercise the schema to ensure validation completed. + BOOST_CHECK_EQUAL(avro::AVRO_RECORD, s.root()->type()); + BOOST_CHECK_EQUAL("Node", s.root()->name().simpleName()); + } + + // After the ValidSchema (and any strong references) go out of scope, + // the weak pointer must not be lockable if there are no cycles. + BOOST_CHECK(weakRoot.expired()); +} + boost::unit_test::test_suite * init_unit_test_suite(int /*argc*/, char * /*argv*/[]) { using namespace boost::unit_test; @@ -89,5 +150,7 @@ init_unit_test_suite(int /*argc*/, char * /*argv*/[]) { auto *ts = BOOST_TEST_SUITE("Avro C++ unit tests for Compiler.cc"); ts->add(BOOST_TEST_CASE(&testEmptyBytesDefault)); ts->add(BOOST_TEST_CASE(&test2dArray)); + ts->add(BOOST_TEST_CASE(&testRecordWithNamedReference)); + ts->add(BOOST_TEST_CASE(&testRecursiveBinaryTreeWeakPtrExpires)); return ts; } diff --git a/lang/c++/test/DataFileTests.cc b/lang/c++/test/DataFileTests.cc index fec7f316a4c..ed4e93c0663 100644 --- a/lang/c++/test/DataFileTests.cc +++ b/lang/c++/test/DataFileTests.cc @@ -16,14 +16,14 @@ * limitations under the License. */ -#include #include #include #include -#include +#include #include #include +#include #include #include @@ -123,7 +123,7 @@ static ValidSchema makeValidSchema(const char *schema) { istringstream iss(schema); ValidSchema vs; compileJsonSchema(iss, vs); - return ValidSchema(vs); + return vs; } static const char sch[] = "{\"type\": \"record\"," @@ -199,7 +199,7 @@ class DataFileTest { using Pair = pair; void testCleanup() { - BOOST_CHECK(boost::filesystem::remove(filename)); + BOOST_CHECK(std::filesystem::remove(filename)); } void testWrite() { @@ -216,8 +216,14 @@ class DataFileTest { } #endif +#ifdef ZSTD_CODEC_AVAILABLE + void testWriteWithZstdCodec() { + testWriteWithCodec(avro::ZSTD_CODEC); + } +#endif + void testWriteWithCodec(avro::Codec codec) { - avro::DataFileWriter df(filename, writerSchema, 100); + avro::DataFileWriter df(filename, writerSchema, 100, codec); int64_t re = 3; int64_t im = 5; for (int i = 0; i < count; ++i, re *= im, im += 3) { @@ -278,12 +284,12 @@ class DataFileTest { void testTruncate() { testWriteDouble(); - uintmax_t size = boost::filesystem::file_size(filename); + uintmax_t size = std::filesystem::file_size(filename); { avro::DataFileWriter df(filename, writerSchema, 100); df.close(); } - uintmax_t new_size = boost::filesystem::file_size(filename); + uintmax_t new_size = std::filesystem::file_size(filename); BOOST_CHECK(size > new_size); } @@ -405,7 +411,7 @@ class DataFileTest { } std::set> actual; int num = 0; - for (int i = sync_points.size() - 2; i >= 0; --i) { + for (ssize_t i = sync_points.size() - 2; i >= 0; --i) { df.seek(sync_points[i]); ComplexInteger ci; // Subtract avro::SyncSize here because sync and pastSync @@ -471,9 +477,7 @@ class DataFileTest { void testReaderSplits() { boost::mt19937 random(static_cast(time(nullptr))); avro::DataFileReader df(filename, writerSchema); - std::ifstream just_for_length( - filename, std::ifstream::ate | std::ifstream::binary); - int length = just_for_length.tellg(); + int length = static_cast(std::filesystem::file_size(filename)); int splits = 10; int end = length; // end of split int remaining = end; // bytes remaining @@ -575,7 +579,7 @@ class DataFileTest { } { avro::DataFileReader reader(filename, dschema); - std::vector found; + std::vector found; ComplexInteger record; while (reader.read(record)) { found.push_back(record.re); @@ -620,6 +624,39 @@ class DataFileTest { } #endif +#ifdef ZSTD_CODEC_AVAILABLE + void testZstd() { + // Add enough objects to span multiple blocks + const size_t number_of_objects = 1000000; + // first create a large file + ValidSchema dschema = avro::compileJsonSchemaFromString(sch); + { + avro::DataFileWriter writer( + filename, dschema, 16 * 1024, avro::ZSTD_CODEC); + + for (size_t i = 0; i < number_of_objects; ++i) { + ComplexInteger d; + d.re = i; + d.im = 2 * i; + writer.write(d); + } + } + { + avro::DataFileReader reader(filename, dschema); + std::this_thread::sleep_for(std::chrono::seconds(1)); + std::vector found; + ComplexInteger record; + while (reader.read(record)) { + found.push_back(record.re); + } + BOOST_CHECK_EQUAL(found.size(), number_of_objects); + for (unsigned int i = 0; i < found.size(); ++i) { + BOOST_CHECK_EQUAL(found[i], i); + } + } + } +#endif + void testSchemaReadWrite() { uint32_t a = 42; { @@ -658,6 +695,81 @@ class DataFileTest { BOOST_CHECK_EQUAL(root->leafAt(5)->getDoc(), "extra slashes\\\\"); } } + + void testClosedReader() { + const auto isNonSeekableInputStreamError = [](const avro::Exception &e) { return e.what() == std::string("seek not supported on non-SeekableInputStream"); }; + + avro::DataFileReader df(filename, writerSchema); + df.close(); + ComplexDouble unused; + BOOST_CHECK(!df.read(unused)); // closed stream can't be read + BOOST_CHECK_EQUAL(df.previousSync(), 0ul); // closed stream always returns begin position + BOOST_CHECK(df.pastSync(10l)); // closed stream always point after position // closed stream always returns begin position + BOOST_CHECK_EQUAL(df.previousSync(), 0u); // closed stream always point at position 0 // closed stream always returns begin position + BOOST_CHECK_EXCEPTION(df.sync(10l), avro::Exception, isNonSeekableInputStreamError); // closed stream always returns begin position + BOOST_CHECK_EXCEPTION(df.seek(10l), avro::Exception, isNonSeekableInputStreamError); // closed stream always returns begin position + } + + void testClosedWriter() { + avro::DataFileWriter df(filename, writerSchema); + df.close(); + ComplexDouble unused; + BOOST_CHECK_NO_THROW(df.write(unused)); // write has not effect on closed stream + } + + void testMetadata() { + avro::Metadata customMetadata; + std::string key1 = "author"; + std::string value1 = "test-user"; + customMetadata[key1] = std::vector(value1.begin(), value1.end()); + + std::string key2 = "version"; + std::string value2 = "1.0.0"; + customMetadata[key2] = std::vector(value2.begin(), value2.end()); + + std::string key3 = "description"; + std::string value3 = "Test file with custom metadata"; + customMetadata[key3] = std::vector(value3.begin(), value3.end()); + + // Write data with custom metadata + { + avro::DataFileWriter df(filename, writerSchema, 100, avro::NULL_CODEC, customMetadata); + int64_t re = 10; + int64_t im = 20; + for (int i = 0; i < 5; ++i, re += 5, im += 10) { + ComplexInteger c(re, im); + df.write(c); + } + df.close(); + } + + // Read and verify metadata + { + avro::DataFileReader df(filename, writerSchema); + const avro::Metadata &readMetadata = df.metadata(); + + // Check that our custom metadata is present + auto it1 = readMetadata.find(key1); + BOOST_CHECK(it1 != readMetadata.end()); + BOOST_CHECK_EQUAL(std::string(it1->second.begin(), it1->second.end()), value1); + + auto it2 = readMetadata.find(key2); + BOOST_CHECK(it2 != readMetadata.end()); + BOOST_CHECK_EQUAL(std::string(it2->second.begin(), it2->second.end()), value2); + + auto it3 = readMetadata.find(key3); + BOOST_CHECK(it3 != readMetadata.end()); + BOOST_CHECK_EQUAL(std::string(it3->second.begin(), it3->second.end()), value3); + + // Check that standard metadata is also present + auto schemaIt = readMetadata.find("avro.schema"); + BOOST_CHECK(schemaIt != readMetadata.end()); + + auto codecIt = readMetadata.find("avro.codec"); + BOOST_CHECK(codecIt != readMetadata.end()); + BOOST_CHECK_EQUAL(std::string(codecIt->second.begin(), codecIt->second.end()), "null"); + } + } }; void addReaderTests(test_suite *ts, const shared_ptr &t) { @@ -696,7 +808,7 @@ struct codec_traits { if (auto *rd = dynamic_cast(&d)) { const std::vector fo = rd->fieldOrder(); - for (unsigned long it : fo) { + for (const auto it : fo) { switch (it) { case 0: { avro::decode(d, v.s2); @@ -773,6 +885,88 @@ void testSkipStringSnappyCodec() { } #endif +#ifdef ZSTD_CODEC_AVAILABLE +void testSkipStringZstdCodec() { + BOOST_TEST_CHECKPOINT(__func__); + testSkipString(avro::ZSTD_CODEC); +} +#endif + +struct Weather { + std::string station; + int64_t time; + int32_t temp; + Weather(const char *station, int64_t time, int32_t temp) + : station(station), time(time), temp(temp) {} + + bool operator==(const Weather &other) const { + return station == other.station && time == other.time && temp == other.temp; + } + friend std::ostream &operator<<(std::ostream &os, const Weather &w) { + return os << w.station << ' ' << w.time << ' ' << w.temp; + } +}; + +namespace avro { +template<> +struct codec_traits { + static void decode(Decoder &d, Weather &v) { + avro::decode(d, v.station); + avro::decode(d, v.time); + avro::decode(d, v.temp); + } +}; +} // namespace avro + +void testCompatibility(const char *filename) { + const char *readerSchemaStr = "{" + "\"type\": \"record\", \"name\": \"test.Weather\", \"fields\":[" + "{\"name\": \"station\", \"type\": \"string\", \"order\": \"ignore\"}," + "{\"name\": \"time\", \"type\": \"long\"}," + "{\"name\": \"temp\", \"type\": \"int\"}" + "]}"; + avro::ValidSchema readerSchema = + avro::compileJsonSchemaFromString(readerSchemaStr); + avro::DataFileReader df(filename, readerSchema); + + Weather ro("", -1, -1); + BOOST_CHECK_EQUAL(df.read(ro), true); + BOOST_CHECK_EQUAL(ro, Weather("011990-99999", -619524000000L, 0)); + BOOST_CHECK_EQUAL(df.read(ro), true); + BOOST_CHECK_EQUAL(ro, Weather("011990-99999", -619506000000L, 22)); + BOOST_CHECK_EQUAL(df.read(ro), true); + BOOST_CHECK_EQUAL(ro, Weather("011990-99999", -619484400000L, -11)); + BOOST_CHECK_EQUAL(df.read(ro), true); + BOOST_CHECK_EQUAL(ro, Weather("012650-99999", -655531200000L, 111)); + BOOST_CHECK_EQUAL(df.read(ro), true); + BOOST_CHECK_EQUAL(ro, Weather("012650-99999", -655509600000L, 78)); + BOOST_CHECK_EQUAL(df.read(ro), false); +} + +void testCompatibilityNullCodec() { + BOOST_TEST_CHECKPOINT(__func__); + testCompatibility("../../share/test/data/weather.avro"); +} + +void testCompatibilityDeflateCodec() { + BOOST_TEST_CHECKPOINT(__func__); + testCompatibility("../../share/test/data/weather-deflate.avro"); +} + +#ifdef SNAPPY_CODEC_AVAILABLE +void testCompatibilitySnappyCodec() { + BOOST_TEST_CHECKPOINT(__func__); + testCompatibility("../../share/test/data/weather-snappy.avro"); +} +#endif + +#ifdef ZSTD_CODEC_AVAILABLE +void testCompatibilityZstdCodec() { + BOOST_TEST_CHECKPOINT(__func__); + testCompatibility("../../share/test/data/weather-zstd.avro"); +} +#endif + struct TestRecord { std::string s1; int64_t id; @@ -948,7 +1142,7 @@ void testReadRecordEfficientlyUsingLastSync(avro::Codec codec) { std::unique_ptr inputStream = avro::memoryInputStream(stitchedData.data(), stitchedData.size()); - int recordsUptoRecordToRead = recordToRead - recordsUptoLastSync; + size_t recordsUptoRecordToRead = recordToRead - recordsUptoLastSync; // Ensure this is not the first record in the chunk. BOOST_CHECK_GT(recordsUptoRecordToRead, 0); @@ -956,7 +1150,7 @@ void testReadRecordEfficientlyUsingLastSync(avro::Codec codec) { avro::DataFileReader df(std::move(inputStream)); TestRecord readRecord("", 0); //::printf("\nReading %d rows until specific record is reached", recordsUptoRecordToRead); - for (int index = 0; index < recordsUptoRecordToRead; index++) { + for (size_t index = 0; index < recordsUptoRecordToRead; index++) { BOOST_CHECK_EQUAL(df.read(readRecord), true); int64_t expectedId = (recordToRead - recordsUptoRecordToRead + index); @@ -986,6 +1180,13 @@ void testLastSyncSnappyCodec() { } #endif +#ifdef ZSTD_CODEC_AVAILABLE +void testLastSyncZstdCodec() { + BOOST_TEST_CHECKPOINT(__func__); + testLastSync(avro::ZSTD_CODEC); +} +#endif + void testReadRecordEfficientlyUsingLastSyncNullCodec() { BOOST_TEST_CHECKPOINT(__func__); testReadRecordEfficientlyUsingLastSync(avro::NULL_CODEC); @@ -1003,8 +1204,301 @@ void testReadRecordEfficientlyUsingLastSyncSnappyCodec() { } #endif +#ifdef ZSTD_CODEC_AVAILABLE +void testReadRecordEfficientlyUsingLastSyncZstdCodec() { + BOOST_TEST_CHECKPOINT(__func__); + testReadRecordEfficientlyUsingLastSync(avro::ZSTD_CODEC); +} +#endif + +void testMetadataWithCodec(avro::Codec codec) { + const char *filename = "test_metadata_codec.df"; + avro::ValidSchema schema = avro::compileJsonSchemaFromString(sch); + + avro::Metadata customMetadata; + std::string key1 = "test.key1"; + std::string value1 = "test-value-1"; + customMetadata[key1] = std::vector(value1.begin(), value1.end()); + + std::string key2 = "test.key2"; + std::string value2 = "test-value-2-with-special-chars: !@#$%^&*()"; + customMetadata[key2] = std::vector(value2.begin(), value2.end()); + + // Write data with custom metadata + { + avro::DataFileWriter writer(filename, schema, 100, codec, customMetadata); + for (int i = 0; i < 10; ++i) { + ComplexInteger c(i * 2, i * 3); + writer.write(c); + } + writer.close(); + } + + // Read and verify metadata + { + avro::DataFileReader reader(filename, schema); + const avro::Metadata &readMetadata = reader.metadata(); + + // Verify custom metadata + auto it1 = readMetadata.find(key1); + BOOST_CHECK(it1 != readMetadata.end()); + BOOST_CHECK_EQUAL(std::string(it1->second.begin(), it1->second.end()), value1); + + auto it2 = readMetadata.find(key2); + BOOST_CHECK(it2 != readMetadata.end()); + BOOST_CHECK_EQUAL(std::string(it2->second.begin(), it2->second.end()), value2); + + // Verify standard metadata + auto schemaIt = readMetadata.find("avro.schema"); + BOOST_CHECK(schemaIt != readMetadata.end()); + + auto codecIt = readMetadata.find("avro.codec"); + BOOST_CHECK(codecIt != readMetadata.end()); + } + + // Clean up + std::filesystem::remove(filename); +} + +void testMetadataWithNullCodec() { + BOOST_TEST_CHECKPOINT(__func__); + testMetadataWithCodec(avro::NULL_CODEC); +} + +void testMetadataWithDeflateCodec() { + BOOST_TEST_CHECKPOINT(__func__); + testMetadataWithCodec(avro::DEFLATE_CODEC); +} + +#ifdef SNAPPY_CODEC_AVAILABLE +void testMetadataWithSnappyCodec() { + BOOST_TEST_CHECKPOINT(__func__); + testMetadataWithCodec(avro::SNAPPY_CODEC); +} +#endif + +#ifdef ZSTD_CODEC_AVAILABLE +void testMetadataWithZstdCodec() { + BOOST_TEST_CHECKPOINT(__func__); + testMetadataWithCodec(avro::ZSTD_CODEC); +} +#endif + +void testDeflateCompressionLevelValidation() { + BOOST_TEST_CHECKPOINT(__func__); + + avro::ValidSchema schema = avro::compileJsonSchemaFromString(sch); + const char *filename = "test_deflate_level.df"; + + boost::mt19937 rng(static_cast(time(nullptr))); + boost::random::uniform_int_distribution<> dist(-100, 100); + + for (int i = 0; i < 100; ++i) { + int level = dist(rng); + bool isValidLevel = (level >= 0 && level <= 9); + + if (isValidLevel) { + // Valid levels should succeed + BOOST_CHECK_NO_THROW({ + avro::DataFileWriter writer( + filename, schema, 16 * 1024, avro::DEFLATE_CODEC, {}, level); + writer.close(); + }); + } else { + // Invalid levels should throw + BOOST_CHECK_THROW({ avro::DataFileWriter writer( + filename, schema, 16 * 1024, avro::DEFLATE_CODEC, {}, level); }, avro::Exception); + } + } + + BOOST_CHECK_NO_THROW({ + avro::DataFileWriter writer( + filename, schema, 16 * 1024, avro::DEFLATE_CODEC, {}, std::nullopt); + writer.close(); + }); + + std::filesystem::remove(filename); +} + +#ifdef ZSTD_CODEC_AVAILABLE +void testZstdCompressionLevelValidation() { + BOOST_TEST_CHECKPOINT(__func__); + + avro::ValidSchema schema = avro::compileJsonSchemaFromString(sch); + const char *filename = "test_zstd_level.df"; + + boost::mt19937 rng(static_cast(time(nullptr))); + boost::random::uniform_int_distribution<> dist(-100, 100); + + for (int i = 0; i < 100; ++i) { + int level = dist(rng); + bool isValidLevel = (level >= 1 && level <= 22); + + if (isValidLevel) { + // Valid levels should succeed + BOOST_CHECK_NO_THROW({ + avro::DataFileWriter writer( + filename, schema, 16 * 1024, avro::ZSTD_CODEC, {}, level); + writer.close(); + }); + } else { + // Invalid levels should throw + BOOST_CHECK_THROW({ avro::DataFileWriter writer( + filename, schema, 16 * 1024, avro::ZSTD_CODEC, {}, level); }, avro::Exception); + } + } + + BOOST_CHECK_NO_THROW({ + avro::DataFileWriter writer( + filename, schema, 16 * 1024, avro::ZSTD_CODEC, {}, std::nullopt); + writer.close(); + }); + + std::filesystem::remove(filename); +} +#endif + +void testDeflateCompressionRoundTrip() { + BOOST_TEST_CHECKPOINT(__func__); + + avro::ValidSchema schema = avro::compileJsonSchemaFromString(sch); + const char *filename = "test_deflate_roundtrip.df"; + + boost::mt19937 rng(static_cast(time(nullptr))); + boost::random::uniform_int_distribution<> levelDist(0, 10); // 0-9 valid, 10 = nullopt + boost::random::uniform_int_distribution<> dataDist(1, 1000); + + for (int i = 0; i < 100; ++i) { + int rawLevel = levelDist(rng); + std::optional level = (rawLevel == 10) ? std::nullopt : std::optional(rawLevel); + int numRecords = dataDist(rng) % 100 + 1; + + std::vector originalData; + int64_t re = rng(); + int64_t im = rng(); + for (int j = 0; j < numRecords; ++j) { + originalData.emplace_back(re, im); + re = re * 31 + im; + im = im * 17 + re; + } + + // Write with compression level + { + avro::DataFileWriter writer( + filename, schema, 16 * 1024, avro::DEFLATE_CODEC, {}, level); + for (const auto &record : originalData) { + writer.write(record); + } + writer.close(); + } + + // Read back and verify + { + avro::DataFileReader reader(filename, schema); + std::vector readData; + ComplexInteger record; + while (reader.read(record)) { + readData.push_back(record); + } + + BOOST_CHECK_EQUAL(readData.size(), originalData.size()); + for (size_t j = 0; j < originalData.size() && j < readData.size(); ++j) { + BOOST_CHECK_EQUAL(readData[j].re, originalData[j].re); + BOOST_CHECK_EQUAL(readData[j].im, originalData[j].im); + } + } + } + + std::filesystem::remove(filename); +} + +#ifdef ZSTD_CODEC_AVAILABLE +void testZstdCompressionRoundTrip() { + BOOST_TEST_CHECKPOINT(__func__); + + avro::ValidSchema schema = avro::compileJsonSchemaFromString(sch); + const char *filename = "test_zstd_roundtrip.df"; + + boost::mt19937 rng(static_cast(time(nullptr))); + // Valid ZSTD levels: 1-22 + boost::random::uniform_int_distribution<> levelDist(0, 22); // 0 = nullopt, 1-22 = valid levels + boost::random::uniform_int_distribution<> dataDist(1, 1000); + + for (int i = 0; i < 100; ++i) { + int rawLevel = levelDist(rng); + std::optional level = (rawLevel == 0) ? std::nullopt : std::optional(rawLevel); + int numRecords = dataDist(rng) % 100 + 1; + + std::vector originalData; + int64_t re = rng(); + int64_t im = rng(); + for (int j = 0; j < numRecords; ++j) { + originalData.emplace_back(re, im); + re = re * 31 + im; + im = im * 17 + re; + } + + // Write with compression level + { + avro::DataFileWriter writer( + filename, schema, 16 * 1024, avro::ZSTD_CODEC, {}, level); + for (const auto &record : originalData) { + writer.write(record); + } + writer.close(); + } + + // Read back and verify + { + avro::DataFileReader reader(filename, schema); + std::vector readData; + ComplexInteger record; + while (reader.read(record)) { + readData.push_back(record); + } + + BOOST_CHECK_EQUAL(readData.size(), originalData.size()); + for (size_t j = 0; j < originalData.size() && j < readData.size(); ++j) { + BOOST_CHECK_EQUAL(readData[j].re, originalData[j].re); + BOOST_CHECK_EQUAL(readData[j].im, originalData[j].im); + } + } + } + + std::filesystem::remove(filename); +} +#endif + +void testCodecEnumValues() { + BOOST_TEST_CHECKPOINT(__func__); + + BOOST_CHECK_EQUAL(static_cast(avro::NULL_CODEC), 0); + BOOST_CHECK_EQUAL(static_cast(avro::DEFLATE_CODEC), 1); + BOOST_CHECK_EQUAL(static_cast(avro::SNAPPY_CODEC), 2); + BOOST_CHECK_EQUAL(static_cast(avro::ZSTD_CODEC), 3); +} + +void testIsCodecAvailable() { + BOOST_TEST_CHECKPOINT(__func__); + + BOOST_CHECK_EQUAL(avro::isCodecAvailable(avro::NULL_CODEC), true); + BOOST_CHECK_EQUAL(avro::isCodecAvailable(avro::DEFLATE_CODEC), true); + +#ifdef SNAPPY_CODEC_AVAILABLE + BOOST_CHECK_EQUAL(avro::isCodecAvailable(avro::SNAPPY_CODEC), true); +#else + BOOST_CHECK_EQUAL(avro::isCodecAvailable(avro::SNAPPY_CODEC), false); +#endif + +#ifdef ZSTD_CODEC_AVAILABLE + BOOST_CHECK_EQUAL(avro::isCodecAvailable(avro::ZSTD_CODEC), true); +#else + BOOST_CHECK_EQUAL(avro::isCodecAvailable(avro::ZSTD_CODEC), false); +#endif +} + test_suite * -init_unit_test_suite(int argc, char *argv[]) { +init_unit_test_suite(int, char *[]) { { auto *ts = BOOST_TEST_SUITE("DataFile tests: test0.df"); shared_ptr t1(new DataFileTest("test1.d0", sch, isch, 0)); @@ -1036,6 +1530,16 @@ init_unit_test_suite(int argc, char *argv[]) { addReaderTests(ts, t1); boost::unit_test::framework::master_test_suite().add(ts); } +#endif +#ifdef ZSTD_CODEC_AVAILABLE + { + auto *ts = BOOST_TEST_SUITE("DataFile tests: test1.zstd.df"); + shared_ptr t1(new DataFileTest("test1.zstd.df", sch, isch)); + ts->add(BOOST_CLASS_TEST_CASE( + &DataFileTest::testWriteWithZstdCodec, t1)); + addReaderTests(ts, t1); + boost::unit_test::framework::master_test_suite().add(ts); + } #endif { auto *ts = BOOST_TEST_SUITE("DataFile tests: test2.df"); @@ -1082,6 +1586,9 @@ init_unit_test_suite(int argc, char *argv[]) { shared_ptr t8(new DataFileTest("test8.df", dsch, dblsch)); #ifdef SNAPPY_CODEC_AVAILABLE ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testSnappy, t8)); +#endif +#ifdef ZSTD_CODEC_AVAILABLE + ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testZstd, t8)); #endif boost::unit_test::framework::master_test_suite().add(ts); } @@ -1097,7 +1604,7 @@ init_unit_test_suite(int argc, char *argv[]) { shared_ptr t9(new DataFileTest("test9.df", sch, sch)); ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testWrite, t9)); ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testReaderSyncSeek, t9)); - //ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testCleanup, t9)); + ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testCleanup, t9)); boost::unit_test::framework::master_test_suite().add(ts); } { @@ -1125,24 +1632,89 @@ init_unit_test_suite(int argc, char *argv[]) { ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testCleanup, t)); boost::unit_test::framework::master_test_suite().add(ts); } + { + auto *ts = BOOST_TEST_SUITE("DataFile tests: test13.df"); + shared_ptr t(new DataFileTest("test13.df", ischWithDoc, ischWithDoc)); + ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testWrite, t)); + ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testClosedReader, t)); + ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testCleanup, t)); + boost::unit_test::framework::master_test_suite().add(ts); + } + { + auto *ts = BOOST_TEST_SUITE("DataFile tests: test14.df"); + shared_ptr t(new DataFileTest("test14.df", ischWithDoc, ischWithDoc)); + ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testClosedWriter, t)); + ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testCleanup, t)); + boost::unit_test::framework::master_test_suite().add(ts); + } + { + auto *ts = BOOST_TEST_SUITE("DataFile tests: test15.df"); + shared_ptr t(new DataFileTest("test15.df", sch, isch)); + ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testMetadata, t)); + ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testCleanup, t)); + boost::unit_test::framework::master_test_suite().add(ts); + } boost::unit_test::framework::master_test_suite().add(BOOST_TEST_CASE(&testSkipStringNullCodec)); boost::unit_test::framework::master_test_suite().add(BOOST_TEST_CASE(&testSkipStringDeflateCodec)); #ifdef SNAPPY_CODEC_AVAILABLE boost::unit_test::framework::master_test_suite().add(BOOST_TEST_CASE(&testSkipStringSnappyCodec)); #endif +#ifdef ZSTD_CODEC_AVAILABLE + boost::unit_test::framework::master_test_suite().add(BOOST_TEST_CASE(&testSkipStringZstdCodec)); +#endif + + boost::unit_test::framework::master_test_suite().add(BOOST_TEST_CASE(&testCompatibilityNullCodec)); + boost::unit_test::framework::master_test_suite().add(BOOST_TEST_CASE(&testCompatibilityDeflateCodec)); +#ifdef SNAPPY_CODEC_AVAILABLE + boost::unit_test::framework::master_test_suite().add(BOOST_TEST_CASE(&testCompatibilitySnappyCodec)); +#endif +#ifdef ZSTD_CODEC_AVAILABLE + boost::unit_test::framework::master_test_suite().add(BOOST_TEST_CASE(&testCompatibilityZstdCodec)); +#endif boost::unit_test::framework::master_test_suite().add(BOOST_TEST_CASE(&testLastSyncNullCodec)); boost::unit_test::framework::master_test_suite().add(BOOST_TEST_CASE(&testLastSyncDeflateCodec)); #ifdef SNAPPY_CODEC_AVAILABLE boost::unit_test::framework::master_test_suite().add(BOOST_TEST_CASE(&testLastSyncSnappyCodec)); #endif +#ifdef ZSTD_CODEC_AVAILABLE + boost::unit_test::framework::master_test_suite().add(BOOST_TEST_CASE(&testLastSyncZstdCodec)); +#endif boost::unit_test::framework::master_test_suite().add(BOOST_TEST_CASE(&testReadRecordEfficientlyUsingLastSyncNullCodec)); boost::unit_test::framework::master_test_suite().add(BOOST_TEST_CASE(&testReadRecordEfficientlyUsingLastSyncDeflateCodec)); #ifdef SNAPPY_CODEC_AVAILABLE boost::unit_test::framework::master_test_suite().add(BOOST_TEST_CASE(&testReadRecordEfficientlyUsingLastSyncSnappyCodec)); #endif +#ifdef ZSTD_CODEC_AVAILABLE + boost::unit_test::framework::master_test_suite().add(BOOST_TEST_CASE(&testReadRecordEfficientlyUsingLastSyncZstdCodec)); +#endif + + boost::unit_test::framework::master_test_suite().add(BOOST_TEST_CASE(&testMetadataWithNullCodec)); + boost::unit_test::framework::master_test_suite().add(BOOST_TEST_CASE(&testMetadataWithDeflateCodec)); +#ifdef SNAPPY_CODEC_AVAILABLE + boost::unit_test::framework::master_test_suite().add(BOOST_TEST_CASE(&testMetadataWithSnappyCodec)); +#endif +#ifdef ZSTD_CODEC_AVAILABLE + boost::unit_test::framework::master_test_suite().add(BOOST_TEST_CASE(&testMetadataWithZstdCodec)); +#endif + + // Codec enum and isCodecAvailable tests + boost::unit_test::framework::master_test_suite().add(BOOST_TEST_CASE(&testCodecEnumValues)); + boost::unit_test::framework::master_test_suite().add(BOOST_TEST_CASE(&testIsCodecAvailable)); + + // Compression level validation property tests + boost::unit_test::framework::master_test_suite().add(BOOST_TEST_CASE(&testDeflateCompressionLevelValidation)); +#ifdef ZSTD_CODEC_AVAILABLE + boost::unit_test::framework::master_test_suite().add(BOOST_TEST_CASE(&testZstdCompressionLevelValidation)); +#endif + + // Compression round-trip property tests + boost::unit_test::framework::master_test_suite().add(BOOST_TEST_CASE(&testDeflateCompressionRoundTrip)); +#ifdef ZSTD_CODEC_AVAILABLE + boost::unit_test::framework::master_test_suite().add(BOOST_TEST_CASE(&testZstdCompressionRoundTrip)); +#endif return nullptr; } diff --git a/lang/c++/test/JsonTests.cc b/lang/c++/test/JsonTests.cc index 3832e691317..afaa20f1ac4 100644 --- a/lang/c++/test/JsonTests.cc +++ b/lang/c++/test/JsonTests.cc @@ -18,7 +18,7 @@ #include -#include +#include #include #include @@ -65,9 +65,13 @@ TestData stringData[] = { {R"("\U000a")", EntityType::String, "\n", R"("\n")"}, {R"("\u000a")", EntityType::String, "\n", R"("\n")"}, {R"("\"")", EntityType::String, "\"", R"("\"")"}, - {R"("\/")", EntityType::String, "/", R"("\/")"}, + // While a solidus may be escaped according to the JSON standard, it need not be escaped. + {R"("/\/")", EntityType::String, "//", R"("//")"}, + {R"("\b\f\n\r\t")", EntityType::String, "\b\f\n\r\t", R"("\b\f\n\r\t")"}, {R"("\u20ac")", EntityType::String, "\xe2\x82\xac", R"("\u20ac")"}, {R"("\u03c0")", EntityType::String, "\xcf\x80", R"("\u03c0")"}, + {R"("hello\n")", EntityType::String, "hello\n", R"("hello\n")"}, + {R"("\Ud8ab\udccd")", EntityType::String, "\xf0\xba\xb3\x8d", R"("\ud8ab\udccd")"}, }; void testBool(const TestData &d) { diff --git a/lang/c++/test/LargeSchemaTests.cc b/lang/c++/test/LargeSchemaTests.cc index a12974c0f75..d99e70988ae 100644 --- a/lang/c++/test/LargeSchemaTests.cc +++ b/lang/c++/test/LargeSchemaTests.cc @@ -21,7 +21,7 @@ #include "ValidSchema.hh" #include -#include +#include #include void testLargeSchema() { diff --git a/lang/c++/test/SchemaTests.cc b/lang/c++/test/SchemaTests.cc old mode 100755 new mode 100644 index 3195eabd003..2aa39d4146e --- a/lang/c++/test/SchemaTests.cc +++ b/lang/c++/test/SchemaTests.cc @@ -18,9 +18,11 @@ #include "Compiler.hh" #include "GenericDatum.hh" +#include "NodeImpl.hh" #include "ValidSchema.hh" -#include +#include +#include #include #include @@ -28,14 +30,14 @@ namespace avro { namespace schema { const char *basicSchemas[] = { - "\"null\"", - "\"boolean\"", - "\"int\"", - "\"long\"", - "\"float\"", - "\"double\"", - "\"bytes\"", - "\"string\"", + R"("null")", + R"("boolean")", + R"("int")", + R"("long")", + R"("float")", + R"("double")", + R"("bytes")", + R"("string")", // Primitive types - longer R"({ "type": "null" })", @@ -48,51 +50,105 @@ const char *basicSchemas[] = { R"({ "type": "string" })", // Record - R"({"type":"record","name":"Test","doc":"Doc_string","fields":[]})", - "{\"type\":\"record\",\"name\":\"Test\",\"fields\":" - "[{\"name\":\"f\",\"type\":\"long\"}]}", - "{\"type\":\"record\",\"name\":\"Test\",\"fields\":" - "[{\"name\":\"f1\",\"type\":\"long\",\"doc\":\"field_doc\"}," - "{\"name\":\"f2\",\"type\":\"int\"}]}", - "{\"type\":\"error\",\"name\":\"Test\",\"fields\":" - "[{\"name\":\"f1\",\"type\":\"long\"}," - "{\"name\":\"f2\",\"type\":\"int\"}]}", - + R"({ + "type":"record", + "name":"Test", + "doc":"Doc_string", + "fields":[] + })", + R"({ + "type":"record", + "name":"Test", + "fields": [ + {"name":"f","type":"long"} + ] + })", + R"({ + "type":"record", + "name":"Test", + "fields":[ + {"name":"f1","type":"long","doc":"field_doc"}, + {"name":"f2","type":"int"} + ] + })", + R"({ + "type":"error", + "name":"Test", + "fields":[ + {"name":"f1","type":"long"}, + {"name":"f2","type":"int"} + ] + })", // Recursive. - "{\"type\":\"record\",\"name\":\"LongList\"," - "\"fields\":[{\"name\":\"value\",\"type\":\"long\",\"doc\":\"recursive_doc\"}," - "{\"name\":\"next\",\"type\":[\"LongList\",\"null\"]}]}", + R"({ + "type":"record", + "name":"LongList", + "fields":[ + {"name":"value","type":"long","doc":"recursive_doc"}, + {"name":"next","type":["LongList","null"]} + ] + })", + // Enum - R"({"type":"enum","doc":"enum_doc","name":"Test","symbols":["A","B"]})", + R"({ + "type":"enum", + "doc":"enum_doc", + "name":"Test", + "symbols":["A","B"] + })", // Array - R"({"type":"array","doc":"array_doc","items":"long"})", - "{\"type\":\"array\",\"items\":{\"type\":\"enum\"," - "\"name\":\"Test\",\"symbols\":[\"A\",\"B\"]}}", + R"({ + "type":"array", + "doc":"array_doc", + "items":"long" + })", + R"({ + "type":"array", + "items":{ + "type":"enum", + "name":"Test", + "symbols":["A","B"] + } + })", // Map R"({"type":"map","doc":"map_doc","values":"long"})", - "{\"type\":\"map\",\"values\":{\"type\":\"enum\", " - "\"name\":\"Test\",\"symbols\":[\"A\",\"B\"]}}", + R"({ + "type":"map", + "values":{ + "type":"enum", + "name":"Test", + "symbols":["A","B"] + } + })", // Union R"(["string","null","long"])", // Fixed R"({"type":"fixed","doc":"fixed_doc","name":"Test","size":1})", - "{\"type\":\"fixed\",\"name\":\"MyFixed\"," - "\"namespace\":\"org.apache.hadoop.avro\",\"size\":1}", + R"({"type":"fixed","name":"MyFixed","namespace":"org.apache.hadoop.avro","size":1})", R"({"type":"fixed","name":"Test","size":1})", R"({"type":"fixed","name":"Test","size":1})", // Extra attributes (should be ignored) R"({"type": "null", "extra attribute": "should be ignored"})", R"({"type": "boolean", "extra1": 1, "extra2": 2, "extra3": 3})", - "{\"type\": \"record\",\"name\": \"Test\",\"fields\": " - "[{\"name\": \"f\",\"type\": \"long\"}], \"extra attribute\": 1}", - "{\"type\": \"enum\", \"name\": \"Test\", \"symbols\": [\"A\", \"B\"]," - "\"extra attribute\": 1}", + R"({ + "type": "record", + "name": "Test", + "fields":[ + {"name": "f","type":"long"} + ], + "extra attribute": 1 + })", + R"({"type": "enum", "name": "Test", "symbols": ["A", "B"],"extra attribute": 1})", + R"({"type": "array", "items": "long", "extra attribute": "1"})", R"({"type": "array", "items": "long", "extra attribute": 1})", + R"({"type": "array", "items": "long", "extra attribute": true})", + R"({"type": "array", "items": "long", "extra attribute": 1.1})", + R"({"type": "array", "items": "long", "extra attribute": {"extra extra attribute": "1"}})", R"({"type": "map", "values": "long", "extra attribute": 1})", R"({"type": "fixed", "name": "Test", "size": 1, "extra attribute": 1})", @@ -103,9 +159,30 @@ const char *basicSchemas[] = { R"({ "name":"test", "type": "record", "fields": [ {"name": "double","type": "double","default" : 1.2 }]})", // namespace with '$' in it. - "{\"type\":\"record\",\"name\":\"Test\",\"namespace\":\"a.b$\",\"fields\":" - "[{\"name\":\"f\",\"type\":\"long\"}]}", -}; + R"({ + "type":"record", + "name":"Test", + "namespace":"a.b$", + "fields":[ + {"name":"f","type":"long"} + ] + })", + + // Custom attribute(s) for field in record + R"({ + "type": "record", + "name": "Test", + "fields":[ + {"name": "f1","type": "long","extra field": "1"} + ] + })", + R"({ + "type": "record", + "name": "Test", + "fields":[ + {"name": "f1","type": "long","extra field1": "1","extra field2": "2"} + ] + })"}; const char *basicSchemaErrors[] = { // Record @@ -115,30 +192,33 @@ const char *basicSchemaErrors[] = { R"({"type":"record","name":"LongList", "fields": "hi"})", // Undefined name - "{\"type\":\"record\",\"name\":\"LongList\"," - "\"fields\":[{\"name\":\"value\",\"type\":\"long\"}," - "{\"name\":\"next\",\"type\":[\"LongListA\",\"null\"]}]}", + R"({ + "type":"record", + "name":"LongList", + "fields":[ + {"name":"value","type":"long"}, + {"name":"next","type":["LongListA","null"]} + ] + })", // Enum // Symbols not an array - "{\"type\": \"enum\", \"name\": \"Status\", \"symbols\": " - "\"Normal Caution Critical\"}", + R"({"type": "enum", "name": "Status", "symbols":"Normal Caution Critical"})", // Name not a string - "{\"type\": \"enum\", \"name\": [ 0, 1, 1, 2, 3, 5, 8 ], " - "\"symbols\": [\"Golden\", \"Mean\"]}", + R"({"type": "enum", "name": [ 0, 1, 1, 2, 3, 5, 8 ], "symbols": ["Golden", "Mean"]})", // No name - "{\"type\": \"enum\", \"symbols\" : [\"I\", \"will\", " - "\"fail\", \"no\", \"name\"]}", + R"({"type": "enum", "symbols" : ["I", "will", "fail", "no", "name"]})", // Duplicate symbol - "{\"type\": \"enum\", \"name\": \"Test\"," - "\"symbols\" : [\"AA\", \"AA\"]}", + R"({"type": "enum", "name": "Test", "symbols" : ["AA", "AA"]})", // Union // Duplicate type R"(["string", "long", "long"])", // Duplicate type - "[{\"type\": \"array\", \"items\": \"long\"}, " - "{\"type\": \"array\", \"items\": \"string\"}]", + R"([ + {"type": "array", "items": "long"}, + {"type": "array", "items": "string"} + ])", // Fixed // No size @@ -155,54 +235,90 @@ const char *basicSchemaErrors[] = { }; const char *roundTripSchemas[] = { - "\"null\"", - "\"boolean\"", - "\"int\"", - "\"long\"", - "\"float\"", - "\"double\"", - "\"bytes\"", - "\"string\"", + R"("null")", + R"("boolean")", + R"("int")", + R"("long")", + R"("float")", + R"("double")", + R"("bytes")", + R"("string")", + // Record R"({"type":"record","name":"Test","fields":[]})", - "{\"type\":\"record\",\"name\":\"Test\",\"fields\":" - "[{\"name\":\"f\",\"type\":\"long\"}]}", - "{\"type\":\"record\",\"name\":\"Test\",\"fields\":" - "[{\"name\":\"f1\",\"type\":\"long\"}," - "{\"name\":\"f2\",\"type\":\"int\"}]}", + R"({ + "type":"record", + "name":"Test", + "fields":[ + {"name":"f","type":"long"} + ] + })", + R"({ + "type":"record", + "name":"Test", + "fields":[ + {"name":"f1","type":"long"}, + {"name":"f2","type":"int"} + ] + })", + /* Avro-C++ cannot do a round-trip on error schemas. - * "{\"type\":\"error\",\"name\":\"Test\",\"fields\":" - * "[{\"name\":\"f1\",\"type\":\"long\"}," - * "{\"name\":\"f2\",\"type\":\"int\"}]}" - */ + * R"({ + * "type":"error", + * "name":"Test", + * "fields":[ + * {"name":"f1","type":"long"}, + * {"name":"f2","type":"int"} + * ] + * })", + */ + // Recursive. - "{\"type\":\"record\",\"name\":\"LongList\"," - "\"fields\":[{\"name\":\"value\",\"type\":\"long\"}," - "{\"name\":\"next\",\"type\":[\"LongList\",\"null\"]}]}", + R"({ + "type":"record", + "name":"LongList", + "fields":[ + {"name":"value","type":"long"}, + {"name":"next","type":["LongList","null"]} + ] + })", + // Enum R"({"type":"enum","name":"Test","symbols":["A","B"]})", // Array R"({"type":"array","items":"long"})", - "{\"type\":\"array\",\"items\":{\"type\":\"enum\"," - "\"name\":\"Test\",\"symbols\":[\"A\",\"B\"]}}", + R"({ + "type":"array", + "items":{ + "type":"enum", + "name":"Test", + "symbols":["A","B"] + } + })", // Map R"({"type":"map","values":"long"})", - "{\"type\":\"map\",\"values\":{\"type\":\"enum\"," - "\"name\":\"Test\",\"symbols\":[\"A\",\"B\"]}}", + R"({ + "type":"map", + "values":{ + "type":"enum", + "name":"Test", + "symbols":["A","B"] + } + })", // Union R"(["string","null","long"])", // Fixed R"({"type":"fixed","name":"Test","size":1})", - "{\"type\":\"fixed\",\"namespace\":\"org.apache.hadoop.avro\"," - "\"name\":\"MyFixed\",\"size\":1}", + R"({"type":"fixed","namespace":"org.apache.hadoop.avro","name":"MyFixed","size":1})", R"({"type":"fixed","name":"Test","size":1})", R"({"type":"fixed","name":"Test","size":1})", // Logical types + R"({"type":"bytes","logicalType":"big-decimal"})", R"({"type":"bytes","logicalType":"decimal","precision":12,"scale":6})", R"({"type":"fixed","name":"test","size":16,"logicalType":"decimal","precision":38,"scale":9})", R"({"type":"fixed","name":"test","size":129,"logicalType":"decimal","precision":310,"scale":155})", @@ -211,22 +327,60 @@ const char *roundTripSchemas[] = { R"({"type":"long","logicalType":"time-micros"})", R"({"type":"long","logicalType":"timestamp-millis"})", R"({"type":"long","logicalType":"timestamp-micros"})", + R"({"type":"long","logicalType":"timestamp-nanos"})", + R"({"type":"long","logicalType":"local-timestamp-millis"})", + R"({"type":"long","logicalType":"local-timestamp-micros"})", + R"({"type":"long","logicalType":"local-timestamp-nanos"})", R"({"type":"fixed","name":"test","size":12,"logicalType":"duration"})", R"({"type":"string","logicalType":"uuid"})", + R"({"type":"fixed","name":"test","size":16,"logicalType":"uuid"})", // namespace with '$' in it. - "{\"type\":\"record\",\"namespace\":\"a.b$\",\"name\":\"Test\",\"fields\":" - "[{\"name\":\"f\",\"type\":\"long\"}]}", + R"({ + "type":"record", + "namespace":"a.b$", + "name":"Test", + "fields":[ + {"name":"f","type":"long"} + ] + })", + + // Custom fields + R"({ + "type":"record", + "name":"Test", + "fields":[ + {"name":"f1","type":"long","extra_field":"1"}, + {"name":"f2","type":"int"} + ] + })", + R"({ + "type":"record", + "name":"Test", + "fields":[ + {"name":"f1","type":"long","extra_field":"1"}, + {"name":"f2","type":"int","extra_field1":"21","extra_field2":"22"} + ] + })", + R"({"type":"array","items":"long","extra":"1"})", + R"({"type":"map","values":"long","extra":"1"})", + R"({"type":"fixed","name":"Test","size":1,"extra":"1"})", + R"({"type":"enum","name":"Test","symbols":["A","B"],"extra":"1"})", }; const char *malformedLogicalTypes[] = { // Wrong base type. + R"({"type":"long","logicalType": "big-decimal"})", R"({"type":"long","logicalType": "decimal","precision": 10})", R"({"type":"string","logicalType":"date"})", R"({"type":"string","logicalType":"time-millis"})", R"({"type":"string","logicalType":"time-micros"})", R"({"type":"string","logicalType":"timestamp-millis"})", R"({"type":"string","logicalType":"timestamp-micros"})", + R"({"type":"string","logicalType":"timestamp-nanos"})", + R"({"type":"string","logicalType":"local-timestamp-millis"})", + R"({"type":"string","logicalType":"local-timestamp-micros"})", + R"({"type":"string","logicalType":"local-timestamp-nanos"})", R"({"type":"string","logicalType":"duration"})", R"({"type":"long","logicalType":"uuid"})", // Missing the required field 'precision'. @@ -235,7 +389,14 @@ const char *malformedLogicalTypes[] = { R"({"type":"fixed","logicalType":"decimal","size":4,"name":"a","precision":20})", R"({"type":"fixed","logicalType":"decimal","size":129,"name":"a","precision":311})", // Scale is larger than precision. - R"({"type":"bytes","logicalType":"decimal","precision":5,"scale":10})"}; + R"({"type":"bytes","logicalType":"decimal","precision":5,"scale":10})", + // Precision is not supported by the big-decimal logical type + // and scale is integrated in bytes. + R"({"type":"bytes","logicalType": "big-decimal","precision": 9})", + R"({"type":"bytes","logicalType": "big-decimal","scale": 2})", + R"({"type":"bytes","logicalType": "big-decimal","precision": 9,"scale": 2})", + R"({"type":"fixed","logicalType":"uuid","size":12,"name":"invalid_uuid_size"})", +}; const char *schemasToCompact[] = { // Schema without any whitespace R"({"type":"record","name":"Test","fields":[]})", @@ -263,6 +424,16 @@ const char *compactSchemas[] = { "{\"name\":\"re2\",\"type\":\"long\",\"doc\":\"extra slashes\\\\\\\\\"}" "]}"}; +static const std::vector whitespaces = {' ', '\f', '\n', '\r', '\t', '\v'}; + +static std::string removeWhitespaceFromSchema(const std::string &schema) { + std::string trimmedSchema = schema; + for (char toReplace : whitespaces) { + boost::algorithm::replace_all(trimmedSchema, std::string{toReplace}, ""); + } + return trimmedSchema; +} + void testTypes() { BOOST_CHECK_EQUAL(isAvroType(AVRO_BOOL), true); } @@ -290,13 +461,13 @@ static void testRoundTrip(const char *schema) { compileJsonSchemaFromString(std::string(schema)); std::ostringstream os; compiledSchema.toJson(os); - std::string result = os.str(); - result.erase(std::remove_if(result.begin(), result.end(), ::isspace), result.end()); // Remove whitespace - BOOST_CHECK(result == std::string(schema)); + std::string result = removeWhitespaceFromSchema(os.str()); + std::string trimmedSchema = removeWhitespaceFromSchema(schema); + BOOST_CHECK_EQUAL(result, trimmedSchema); // Verify that the compact schema from toJson has the same content as the // schema. std::string result2 = compiledSchema.toJson(false); - BOOST_CHECK(result2 == std::string(schema)); + BOOST_CHECK_EQUAL(result2, trimmedSchema); } static void testCompactSchemas() { @@ -312,49 +483,45 @@ static void testCompactSchemas() { } static void testLogicalTypes() { - const char *bytesDecimalType = "{\n\ - \"type\": \"bytes\",\n\ - \"logicalType\": \"decimal\",\n\ - \"precision\": 10,\n\ - \"scale\": 2\n\ - }"; - const char *fixedDecimalType = "{\n\ - \"type\": \"fixed\",\n\ - \"size\": 16,\n\ - \"name\": \"fixedDecimalType\",\n\ - \"logicalType\": \"decimal\",\n\ - \"precision\": 12,\n\ - \"scale\": 6\n\ - }"; - const char *dateType = "{\n\ - \"type\": \"int\", \"logicalType\": \"date\"\n\ - }"; - const char *timeMillisType = "{\n\ - \"type\": \"int\", \"logicalType\": \"time-millis\"\n\ - }"; - const char *timeMicrosType = "{\n\ - \"type\": \"long\", \"logicalType\": \"time-micros\"\n\ - }"; - const char *timestampMillisType = "{\n\ - \"type\": \"long\", \"logicalType\": \"timestamp-millis\"\n\ - }"; - const char *timestampMicrosType = "{\n\ - \"type\": \"long\", \"logicalType\": \"timestamp-micros\"\n\ - }"; - const char *durationType = "{\n\ - \"type\": \"fixed\",\n\ - \"size\": 12,\n\ - \"name\": \"durationType\",\n\ - \"logicalType\": \"duration\"\n\ - }"; - const char *uuidType = "{\n\ - \"type\": \"string\",\n\ - \"logicalType\": \"uuid\"\n\ - }"; + const char *bytesBigDecimalType = R"({ + "type": "bytes", + "logicalType": "big-decimal" + })"; + const char *bytesDecimalType = R"({ + "type": "bytes", + "logicalType": "decimal", + "precision": 10, + "scale": 2 + })"; + const char *fixedDecimalType = R"({ + "type": "fixed", + "size": 16, + "name": "fixedDecimalType", + "logicalType": "decimal", + "precision": 12, + "scale": 6 + })"; + const char *dateType = R"({"type": "int", "logicalType": "date"})"; + const char *timeMillisType = R"({"type": "int", "logicalType": "time-millis"})"; + const char *timeMicrosType = R"({"type": "long", "logicalType": "time-micros"})"; + const char *timestampMillisType = R"({"type": "long", "logicalType": "timestamp-millis"})"; + const char *timestampMicrosType = R"({"type": "long", "logicalType": "timestamp-micros"})"; + const char *timestampNanosType = R"({"type": "long", "logicalType": "timestamp-nanos"})"; + const char *localTimestampMillisType = R"({"type": "long", "logicalType": "local-timestamp-millis"})"; + const char *localTimestampMicrosType = R"({"type": "long", "logicalType": "local-timestamp-micros"})"; + const char *localTimestampNanosType = R"({"type": "long", "logicalType": "local-timestamp-nanos"})"; + const char *durationType = R"({"type": "fixed","size": 12,"name": "durationType","logicalType": "duration"})"; + const char *uuidStringType = R"({"type": "string","logicalType": "uuid"})"; + const char *uuidFixedType = R"({"type": "fixed", "size": 16, "name": "uuidFixedType", "logicalType": "uuid"})"; // AVRO-2923 Union with LogicalType - const char* unionType = "[\n\ - {\"type\":\"string\", \"logicalType\":\"uuid\"},\"null\"\n\ - ]"; + const char *unionType = R"([{"type":"string", "logicalType":"uuid"},"null"]})"; + { + BOOST_TEST_CHECKPOINT(bytesBigDecimalType); + ValidSchema schema = compileJsonSchemaFromString(bytesBigDecimalType); + BOOST_CHECK(schema.root()->type() == AVRO_BYTES); + LogicalType logicalType = schema.root()->logicalType(); + BOOST_CHECK(logicalType.type() == LogicalType::BIG_DECIMAL); + } { BOOST_TEST_CHECKPOINT(bytesDecimalType); ValidSchema schema1 = compileJsonSchemaFromString(bytesDecimalType); @@ -421,6 +588,42 @@ static void testLogicalTypes() { GenericDatum datum(schema); BOOST_CHECK(datum.logicalType().type() == LogicalType::TIMESTAMP_MICROS); } + { + BOOST_TEST_CHECKPOINT(timestampNanosType); + ValidSchema schema = compileJsonSchemaFromString(timestampNanosType); + BOOST_CHECK(schema.root()->type() == AVRO_LONG); + LogicalType logicalType = schema.root()->logicalType(); + BOOST_CHECK(logicalType.type() == LogicalType::TIMESTAMP_NANOS); + GenericDatum datum(schema); + BOOST_CHECK(datum.logicalType().type() == LogicalType::TIMESTAMP_NANOS); + } + { + BOOST_TEST_CHECKPOINT(localTimestampMillisType); + ValidSchema schema = compileJsonSchemaFromString(localTimestampMillisType); + BOOST_CHECK(schema.root()->type() == AVRO_LONG); + LogicalType logicalType = schema.root()->logicalType(); + BOOST_CHECK(logicalType.type() == LogicalType::LOCAL_TIMESTAMP_MILLIS); + GenericDatum datum(schema); + BOOST_CHECK(datum.logicalType().type() == LogicalType::LOCAL_TIMESTAMP_MILLIS); + } + { + BOOST_TEST_CHECKPOINT(localTimestampMicrosType); + ValidSchema schema = compileJsonSchemaFromString(localTimestampMicrosType); + BOOST_CHECK(schema.root()->type() == AVRO_LONG); + LogicalType logicalType = schema.root()->logicalType(); + BOOST_CHECK(logicalType.type() == LogicalType::LOCAL_TIMESTAMP_MICROS); + GenericDatum datum(schema); + BOOST_CHECK(datum.logicalType().type() == LogicalType::LOCAL_TIMESTAMP_MICROS); + } + { + BOOST_TEST_CHECKPOINT(localTimestampNanosType); + ValidSchema schema = compileJsonSchemaFromString(localTimestampNanosType); + BOOST_CHECK(schema.root()->type() == AVRO_LONG); + LogicalType logicalType = schema.root()->logicalType(); + BOOST_CHECK(logicalType.type() == LogicalType::LOCAL_TIMESTAMP_NANOS); + GenericDatum datum(schema); + BOOST_CHECK(datum.logicalType().type() == LogicalType::LOCAL_TIMESTAMP_NANOS); + } { BOOST_TEST_CHECKPOINT(durationType); ValidSchema schema = compileJsonSchemaFromString(durationType); @@ -432,14 +635,24 @@ static void testLogicalTypes() { BOOST_CHECK(datum.logicalType().type() == LogicalType::DURATION); } { - BOOST_TEST_CHECKPOINT(uuidType); - ValidSchema schema = compileJsonSchemaFromString(uuidType); + BOOST_TEST_CHECKPOINT(uuidStringType); + ValidSchema schema = compileJsonSchemaFromString(uuidStringType); BOOST_CHECK(schema.root()->type() == AVRO_STRING); LogicalType logicalType = schema.root()->logicalType(); BOOST_CHECK(logicalType.type() == LogicalType::UUID); GenericDatum datum(schema); BOOST_CHECK(datum.logicalType().type() == LogicalType::UUID); } + { + BOOST_TEST_CHECKPOINT(uuidFixedType); + ValidSchema schema = compileJsonSchemaFromString(uuidFixedType); + BOOST_CHECK(schema.root()->type() == AVRO_FIXED); + BOOST_CHECK(schema.root()->fixedSize() == 16); + LogicalType logicalType = schema.root()->logicalType(); + BOOST_CHECK(logicalType.type() == LogicalType::UUID); + GenericDatum datum(schema); + BOOST_CHECK(datum.logicalType().type() == LogicalType::UUID); + } { BOOST_TEST_CHECKPOINT(unionType); ValidSchema schema = compileJsonSchemaFromString(unionType); @@ -460,6 +673,257 @@ static void testMalformedLogicalTypes(const char *schema) { BOOST_CHECK(datum.logicalType().type() == LogicalType::NONE); } +static void testCustomLogicalType() { + // Declare a custom logical type. + struct MapLogicalType : public CustomLogicalType { + MapLogicalType() : CustomLogicalType("map") {} + }; + + // Register the custom logical type with the registry. + CustomLogicalTypeRegistry::instance().registerType("map", [](const std::string &) { + return std::make_shared(); + }); + + auto verifyCustomLogicalType = [](const ValidSchema &schema) { + auto logicalType = schema.root()->logicalType(); + BOOST_CHECK_EQUAL(logicalType.type(), LogicalType::CUSTOM); + BOOST_CHECK_EQUAL(logicalType.customLogicalType()->name(), "map"); + }; + + const std::string schema = + R"({ "type": "array", + "logicalType": "map", + "items": { + "type": "record", + "name": "k12_v13", + "fields": [ + { "name": "key", "type": "int", "field-id": 12 }, + { "name": "value", "type": "string", "field-id": 13 } + ] + } + })"; + auto compiledSchema = compileJsonSchemaFromString(schema); + verifyCustomLogicalType(compiledSchema); + + auto json = compiledSchema.toJson(); + auto parsedSchema = compileJsonSchemaFromString(json); + verifyCustomLogicalType(parsedSchema); +} + +static void testParseCustomAttributes() { + const std::string schema = R"({ + "type": "record", + "name": "my_record", + "fields": [ + { "name": "long_field", + "type": ["null", "long"], + "field-id": 1 }, + { "name": "array_field", + "type": { "type": "array", "items": "int", "element-id": 3 }, + "field-id": 2, + "extra": "1", "extra2": "2" }, + { "name": "map_field", + "type": { "type": "map", "values": "int", "key-id": 5, "value-id": 6 }, + "field-id": 4, + "extra": "foo" }, + { "name": "timestamp_field", + "type": "long", "logicalType": "timestamp-micros", "adjust-to-utc": true, + "field-id": 10, + "extra": "bar" }, + { "name": "no_custom_attributes_field", + "type": "long" } + ] + })"; + + ValidSchema compiledSchema = compileJsonSchemaFromString(schema); + const NodePtr &root = compiledSchema.root(); + BOOST_CHECK_EQUAL(root->customAttributes(), 5); + + // long_field + { + auto customAttributes = root->customAttributesAt(0); + BOOST_CHECK_EQUAL(customAttributes.getAttribute("field-id").value(), "1"); + } + + // array_field + { + auto customAttributes = root->customAttributesAt(1); + BOOST_CHECK_EQUAL(customAttributes.getAttribute("extra").value(), "1"); + BOOST_CHECK_EQUAL(customAttributes.getAttribute("extra2").value(), "2"); + BOOST_CHECK_EQUAL(customAttributes.getAttribute("field-id").value(), "2"); + + auto arrayField = root->leafAt(1); + BOOST_CHECK_EQUAL(arrayField->customAttributes(), 1); + auto arrayFieldCustomAttributes = arrayField->customAttributesAt(0); + BOOST_CHECK_EQUAL(arrayFieldCustomAttributes.getAttribute("element-id").value(), "3"); + } + + // map_field + { + auto customAttributes = root->customAttributesAt(2); + BOOST_CHECK_EQUAL(customAttributes.getAttribute("field-id").value(), "4"); + BOOST_CHECK_EQUAL(customAttributes.getAttribute("extra").value(), "foo"); + + auto mapField = root->leafAt(2); + BOOST_CHECK_EQUAL(mapField->customAttributes(), 1); + auto mapFieldCustomAttributes = mapField->customAttributesAt(0); + BOOST_CHECK_EQUAL(mapFieldCustomAttributes.getAttribute("key-id").value(), "5"); + BOOST_CHECK_EQUAL(mapFieldCustomAttributes.getAttribute("value-id").value(), "6"); + } + + // timestamp_field + { + auto customAttributes = root->customAttributesAt(3); + BOOST_CHECK_EQUAL(customAttributes.getAttribute("field-id").value(), "10"); + BOOST_CHECK_EQUAL(customAttributes.getAttribute("extra").value(), "bar"); + BOOST_CHECK_EQUAL(customAttributes.getAttribute("adjust-to-utc").value(), "true"); + } + + // no_custom_attributes_field + { + auto customAttributes = root->customAttributesAt(4); + BOOST_CHECK_EQUAL(customAttributes.attributes().size(), 0); + } +} + +static void testAddCustomAttributes() { + auto recordNode = std::make_shared(); + + // long_field + { + CustomAttributes customAttributes; + customAttributes.addAttribute("field-id", "1"); + recordNode->addCustomAttributesForField(customAttributes); + recordNode->addLeaf(std::make_shared(AVRO_LONG)); + recordNode->addName("long_field"); + } + + // array_field + { + auto arrayField = std::make_shared(SingleLeaf(std::make_shared(AVRO_INT))); + CustomAttributes elementCustomAttributes; + elementCustomAttributes.addAttribute("element-id", "3"); + arrayField->addCustomAttributesForField(elementCustomAttributes); + + CustomAttributes customAttributes; + customAttributes.addAttribute("field-id", "2"); + customAttributes.addAttribute("extra", "1"); + customAttributes.addAttribute("extra2", "2"); + recordNode->addCustomAttributesForField(customAttributes); + recordNode->addLeaf(arrayField); + recordNode->addName("array_field"); + } + + // map_field + { + auto mapField = std::make_shared(SingleLeaf(std::make_shared(AVRO_INT))); + CustomAttributes keyValueCustomAttributes; + keyValueCustomAttributes.addAttribute("key-id", "5"); + keyValueCustomAttributes.addAttribute("value-id", "6"); + mapField->addCustomAttributesForField(keyValueCustomAttributes); + + CustomAttributes customAttributes; + customAttributes.addAttribute("field-id", "4"); + customAttributes.addAttribute("extra", "foo"); + recordNode->addCustomAttributesForField(customAttributes); + recordNode->addLeaf(mapField); + recordNode->addName("map_field"); + } + + // timestamp_field + { + auto timestampField = std::make_shared(AVRO_LONG); + CustomAttributes customAttributes; + customAttributes.addAttribute("field-id", "10"); + customAttributes.addAttribute("extra", "bar"); + customAttributes.addAttribute("adjust-to-utc", "true"); + recordNode->addCustomAttributesForField(customAttributes); + recordNode->addLeaf(timestampField); + recordNode->addName("timestamp_field"); + } + + const std::string expected = R"({ + "type": "record", + "name": "", + "fields": [ + { "name": "long_field", + "type": "long", + "field-id": "1" }, + { "name": "array_field", + "type": { "type": "array", "items": "int", "element-id": "3" }, + "extra": "1", + "extra2": "2", + "field-id": "2" }, + { "name": "map_field", + "type": { "type": "map", "values": "int", "key-id": "5", "value-id": "6" }, + "extra": "foo", + "field-id": "4" }, + { "name": "timestamp_field", + "type": "long", + "adjust-to-utc": "true", + "extra": "bar", + "field-id": "10" } + ] + })"; + ValidSchema schema(recordNode); + std::string json = schema.toJson(); + BOOST_CHECK_EQUAL(removeWhitespaceFromSchema(json), removeWhitespaceFromSchema(expected)); +} + +static void testCustomAttributesJson2Schema2Json() { + const std::string schema = R"({ + "type": "record", + "name": "my_record", + "fields": [ + { "name": "long_field", "type": "long", "int_key": 1, "str_key": "1" } + ] + })"; + ValidSchema compiledSchema = compileJsonSchemaFromString(schema); + + // Verify custom attributes from parsed schema + auto customAttributes = compiledSchema.root()->customAttributesAt(0); + BOOST_CHECK_EQUAL(customAttributes.getAttribute("int_key").value(), "1"); + BOOST_CHECK_EQUAL(customAttributes.getAttribute("str_key").value(), "1"); + + // Verify custom attributes from json result + std::string json = compiledSchema.toJson(); + BOOST_CHECK_EQUAL(removeWhitespaceFromSchema(json), removeWhitespaceFromSchema(schema)); +} + +static void testCustomAttributesSchema2Json2Schema() { + const std::string expected = R"({ + "type": "record", + "name": "my_record", + "fields": [ + { "name": "long_field", "type": "long", "int_key": 1, "str_key": "1" } + ] + })"; + + auto recordNode = std::make_shared(); + { + CustomAttributes customAttributes; + customAttributes.addAttribute("int_key", "1", /*addQuotes=*/false); + customAttributes.addAttribute("str_key", "1", /*addQuotes=*/true); + recordNode->addCustomAttributesForField(customAttributes); + recordNode->addLeaf(std::make_shared(AVRO_LONG)); + recordNode->addName("long_field"); + recordNode->setName(Name("my_record")); + } + + // Verify custom attributes from json result + ValidSchema schema(recordNode); + std::string json = schema.toJson(); + BOOST_CHECK_EQUAL(removeWhitespaceFromSchema(json), removeWhitespaceFromSchema(expected)); + + // Verify custom attributes from parsed schema + { + auto parsedSchema = compileJsonSchemaFromString(json); + auto customAttributes = parsedSchema.root()->customAttributesAt(0); + BOOST_CHECK_EQUAL(customAttributes.getAttribute("int_key").value(), "1"); + BOOST_CHECK_EQUAL(customAttributes.getAttribute("str_key").value(), "1"); + } +} + } // namespace schema } // namespace avro @@ -483,5 +947,10 @@ init_unit_test_suite(int /*argc*/, char * /*argv*/[]) { ADD_PARAM_TEST(ts, avro::schema::testMalformedLogicalTypes, avro::schema::malformedLogicalTypes); ts->add(BOOST_TEST_CASE(&avro::schema::testCompactSchemas)); + ts->add(BOOST_TEST_CASE(&avro::schema::testCustomLogicalType)); + ts->add(BOOST_TEST_CASE(&avro::schema::testParseCustomAttributes)); + ts->add(BOOST_TEST_CASE(&avro::schema::testAddCustomAttributes)); + ts->add(BOOST_TEST_CASE(&avro::schema::testCustomAttributesJson2Schema2Json)); + ts->add(BOOST_TEST_CASE(&avro::schema::testCustomAttributesSchema2Json2Schema)); return ts; } diff --git a/lang/c++/test/SpecificTests.cc b/lang/c++/test/SpecificTests.cc index e027f9518d2..72f2897e45b 100644 --- a/lang/c++/test/SpecificTests.cc +++ b/lang/c++/test/SpecificTests.cc @@ -16,7 +16,7 @@ * limitations under the License. */ -#include +#include #include #include "Specific.hh" diff --git a/lang/c++/test/StreamTests.cc b/lang/c++/test/StreamTests.cc index 262e0600a32..d14558ecd28 100644 --- a/lang/c++/test/StreamTests.cc +++ b/lang/c++/test/StreamTests.cc @@ -18,10 +18,11 @@ #include "Exception.hh" #include "Stream.hh" -#include "boost/filesystem.hpp" -#include +#include #include +#include + namespace avro { namespace stream { @@ -51,7 +52,7 @@ struct Fill1 { StreamWriter w; w.reset(os); for (size_t i = 0; i < len; ++i) { - w.write(i % 10 + '0'); + w.write(static_cast(i % 10 + '0')); } w.flush(); } @@ -65,7 +66,7 @@ struct Fill2 { os.next(&b, &n); size_t j = 0; for (; i < len && j < n; ++j, ++i, ++b) { - *b = i % 10 + '0'; + *b = static_cast(i % 10 + '0'); } if (i == len) { os.backup(n - j); @@ -125,7 +126,7 @@ void testNonEmpty_memoryStream(const TestData &td) { void testNonEmpty2(const TestData &td) { std::vector v; for (size_t i = 0; i < td.dataSize; ++i) { - v.push_back(i % 10 + '0'); + v.push_back(static_cast(i % 10 + '0')); } uint8_t v2 = 0; @@ -136,9 +137,9 @@ void testNonEmpty2(const TestData &td) { static const char filename[] = "test_str.bin"; struct FileRemover { - const boost::filesystem::path file; + const std::filesystem::path file; explicit FileRemover(const char *fn) : file(fn) {} - ~FileRemover() { boost::filesystem::remove(file); } + ~FileRemover() { std::filesystem::remove(file); } }; template diff --git a/lang/c++/test/buffertest.cc b/lang/c++/test/buffertest.cc index 1881028593b..45634dab7ea 100644 --- a/lang/c++/test/buffertest.cc +++ b/lang/c++/test/buffertest.cc @@ -16,13 +16,10 @@ * limitations under the License. */ -#include +#include -#include +#include -#ifdef HAVE_BOOST_ASIO -#include -#endif #include "buffer/BufferPrint.hh" #include "buffer/BufferReader.hh" #include "buffer/BufferStream.hh" @@ -34,19 +31,18 @@ using detail::kMinBlockSize; using std::cout; using std::endl; +// Make a string of repeating 0123456789ABCDEF0123456789... std::string makeString(size_t len) { - std::string newstring; - newstring.reserve(len); + std::string result; + result.reserve(len); + + constexpr char chars[] = "0123456789ABCDEF"; for (size_t i = 0; i < len; ++i) { - char newchar = '0' + i % 16; - if (newchar > '9') { - newchar += 7; - } - newstring.push_back(newchar); + result.push_back(chars[i % 16]); } - return newstring; + return result; } void printBuffer(const InputBuffer &buf) { @@ -219,7 +215,7 @@ void TestDiscard() { BOOST_CHECK_EQUAL(ob.freeSpace(), kDefaultBlockSize / 2); BOOST_CHECK_EQUAL(ob.numChunks(), 1); - int chunks = 3 - (discarded / kDefaultBlockSize); + size_t chunks = 3 - (discarded / kDefaultBlockSize); BOOST_CHECK_EQUAL(ob.numDataChunks(), chunks); } @@ -331,7 +327,7 @@ void TestExtractToInput() { BOOST_CHECK_EQUAL(ob.freeSpace(), kDefaultBlockSize / 2); BOOST_CHECK_EQUAL(ob.numChunks(), 1); - int chunks = 3 - (extracted / kDefaultBlockSize); + size_t chunks = 3 - (extracted / kDefaultBlockSize); BOOST_CHECK_EQUAL(ob.numDataChunks(), chunks); } @@ -526,7 +522,7 @@ void TestSeek() { avro::InputBuffer buf(tmp1); cout << "Starting string: " << str << '\n'; - BOOST_CHECK_EQUAL(static_cast(buf.size()), str.size()); + BOOST_CHECK_EQUAL(buf.size(), str.size()); avro::istream is(buf); @@ -608,108 +604,46 @@ void TestIterator() { } } -#ifdef HAVE_BOOST_ASIO -void server(boost::barrier &b) { - using boost::asio::ip::tcp; - boost::asio::io_service io_service; - tcp::acceptor a(io_service, tcp::endpoint(tcp::v4(), 33333)); - tcp::socket sock(io_service); - a.listen(); - - b.wait(); - - a.accept(sock); - avro::OutputBuffer buf(100); - - size_t length = sock.receive(buf); - buf.wroteTo(length); - cout << "Server got " << length << " bytes\n"; - - InputBuffer rbuf(buf); - - std::string res; - - avro::InputBuffer::const_iterator iter = rbuf.begin(); - while (iter != rbuf.end()) { - res.append(boost::asio::buffer_cast(*iter), boost::asio::buffer_size(*iter)); - cout << "Received Buffer size: " << boost::asio::buffer_size(*iter) << endl; - BOOST_CHECK_EQUAL(length, boost::asio::buffer_size(*iter)); - cout << "Received Buffer: \"" << res << '"' << endl; - ++iter; - } - - BOOST_CHECK_EQUAL(res, "hello world"); -} - +// Historical context: Prior to AVRO-4178, InputBuffer and OutputBuffer iterators +// had implicit conversion operators to boost::asio::const_buffer and +// boost::asio::mutable_buffer (via ConstAsioBuffer and MutableAsioBuffer typedefs). +// These conversions were removed to eliminate the Boost::system dependency. +// This test demonstrates the recommended workaround: users should access the +// public data() and size() member functions of the dereferenced iterator instead. +// These functions provide the same underlying buffer pointer and size information +// that the ASIO conversions provided, allowing integration with any I/O library. void TestAsioBuffer() { - using boost::asio::ip::tcp; BOOST_TEST_MESSAGE("TestAsioBuffer"); { - boost::barrier b(2); - - boost::thread t(boost::bind(server, boost::ref(b))); - - b.wait(); - - // set up the thing - boost::asio::io_service io_service; - - tcp::resolver resolver(io_service); - tcp::resolver::query query(tcp::v4(), "localhost", "33333"); - tcp::resolver::iterator endpoint_iterator = resolver.resolve(query); - tcp::resolver::iterator end; - - tcp::socket socket(io_service); - boost::system::error_code error = boost::asio::error::host_not_found; - while (error && endpoint_iterator != end) { - socket.close(); - socket.connect(*endpoint_iterator++, error); - } - if (error) { - throw error; - } - std::string hello = "hello "; std::string world = "world"; + + // Create a buffer with data avro::OutputBuffer buf; buf.writeTo(hello.c_str(), hello.size()); - BOOST_CHECK_EQUAL(buf.size(), hello.size()); - avro::OutputBuffer buf2; buf2.writeTo(world.c_str(), world.size()); - BOOST_CHECK_EQUAL(buf2.size(), world.size()); buf.append(buf2); BOOST_CHECK_EQUAL(buf.size(), hello.size() + world.size()); - cout << "Distance " << std::distance(buf.begin(), buf.end()) << endl; - BOOST_CHECK_EQUAL(std::distance(buf.begin(), buf.end()), 1); - + // Convert to InputBuffer for reading const avro::InputBuffer rbuf(buf); + // Demonstrate the workaround: instead of relying on implicit ASIO conversions, + // users can access data() and size() directly from the dereferenced iterator. + std::string reconstructed; avro::InputBuffer::const_iterator iter = rbuf.begin(); while (iter != rbuf.end()) { - std::string str(boost::asio::buffer_cast(*iter), boost::asio::buffer_size(*iter)); - cout << "Buffer size: " << boost::asio::buffer_size(*iter) << endl; - cout << "Buffer: \"" << str << '"' << endl; + reconstructed.append(iter->data(), iter->size()); ++iter; } - cout << "Buffer size " << rbuf.size() << endl; - - std::size_t wrote = boost::asio::write(socket, rbuf); - cout << "Wrote " << wrote << endl; - BOOST_CHECK_EQUAL(wrote, rbuf.size()); - - t.join(); + BOOST_CHECK_EQUAL(reconstructed, "hello world"); + BOOST_CHECK_EQUAL(reconstructed.size(), rbuf.size()); } } -#else -void TestAsioBuffer() { - cout << "Skipping asio test\n"; -} -#endif // HAVE_BOOST_ASIO void TestSplit() { BOOST_TEST_MESSAGE("TestSplit"); diff --git a/lang/c++/test/testgentest.cc b/lang/c++/test/testgentest.cc index 3d86329ce08..c204dd99633 100644 --- a/lang/c++/test/testgentest.cc +++ b/lang/c++/test/testgentest.cc @@ -16,7 +16,7 @@ * limitations under the License. */ -#include +#include #include #include #include diff --git a/lang/c++/test/unittest.cc b/lang/c++/test/unittest.cc index 82b92f520da..b0cb44c5b9f 100644 --- a/lang/c++/test/unittest.cc +++ b/lang/c++/test/unittest.cc @@ -16,8 +16,10 @@ * limitations under the License. */ -#include +#include #include +#include +#include #include "Compiler.hh" #include "Decoder.hh" @@ -36,6 +38,10 @@ #include "buffer/BufferStream.hh" #include "AvroSerialize.hh" +#include "CustomAttributes.hh" +#include "NodeConcepts.hh" +#include "NodeImpl.hh" +#include "Types.hh" using namespace avro; @@ -67,7 +73,19 @@ struct TestSchema { void buildSchema() { RecordSchema record("RootRecord"); - record.addField("mylong", LongSchema()); + CustomAttributes customAttributeLong; + customAttributeLong.addAttribute("extra_info_mylong", std::string("it's a long field")); + // Validate that adding a custom attribute with same name is not allowed + bool caught = false; + try { + customAttributeLong.addAttribute("extra_info_mylong", std::string("duplicate")); + } catch (Exception &e) { + std::cout << "(intentional) exception: " << e.what() << '\n'; + caught = true; + } + BOOST_CHECK_EQUAL(caught, true); + // Add custom attribute for the field + record.addField("mylong", LongSchema(), customAttributeLong); IntSchema intSchema; avro::MapSchema map = MapSchema(IntSchema()); @@ -85,7 +103,7 @@ struct TestSchema { myenum.addSymbol("two"); myenum.addSymbol("three"); - bool caught = false; + caught = false; try { myenum.addSymbol("three"); } catch (Exception &e) { @@ -121,7 +139,12 @@ struct TestSchema { } BOOST_CHECK_EQUAL(caught, true); - record.addField("mylong2", LongSchema()); + CustomAttributes customAttributeLong2; + customAttributeLong2.addAttribute("extra_info_mylong2", + std::string("it's a long field")); + customAttributeLong2.addAttribute("more_info_mylong2", + std::string("it's still a long field")); + record.addField("mylong2", LongSchema(), customAttributeLong2); record.addField("anotherint", intSchema); @@ -251,7 +274,7 @@ struct TestSchema { out << is.rdbuf(); } - void printNext(Parser &p) { + void printNext(Parser &) { // no-op printer } @@ -387,6 +410,93 @@ struct TestSchema { readData(p); } + void testNodeRecord(const NodeRecord &nodeRecord, + const std::string &expectedJson) { + BOOST_CHECK_EQUAL(nodeRecord.isValid(), true); + + std::ostringstream oss; + nodeRecord.printJson(oss, 0); + std::string actual = oss.str(); + actual.erase(std::remove_if(actual.begin(), actual.end(), + ::isspace), + actual.end()); + + std::string expected = expectedJson; + expected.erase(std::remove_if(expected.begin(), expected.end(), + ::isspace), + expected.end()); + + BOOST_CHECK_EQUAL(actual, expected); + } + + // Create NodeRecord with custom attributes at field level + // validate json serialization + void checkNodeRecordWithCustomAttribute() { + Name recordName("Test"); + HasName nameConcept(recordName); + concepts::MultiAttribute fieldNames; + std::vector> fieldAliases; + concepts::MultiAttribute fieldValues; + std::vector defaultValues; + concepts::MultiAttribute customAttributes; + + CustomAttributes cf; + cf.addAttribute("stringField", std::string("\\\"field value with \\\"double quotes\\\"\\\"")); + cf.addAttribute("booleanField", std::string("true")); + cf.addAttribute("numberField", std::string("1.23")); + cf.addAttribute("nullField", std::string("null")); + cf.addAttribute("arrayField", std::string("[1]")); + cf.addAttribute("mapField", std::string("{\\\"key1\\\":\\\"value1\\\", \\\"key2\\\":\\\"value2\\\"}")); + fieldNames.add("f1"); + fieldValues.add(NodePtr(new NodePrimitive(Type::AVRO_LONG))); + customAttributes.add(cf); + + NodeRecord nodeRecordWithCustomAttribute(nameConcept, fieldValues, + fieldNames, fieldAliases, defaultValues, + customAttributes); + std::string expectedJsonWithCustomAttribute = + "{\"type\": \"record\", \"name\": \"Test\",\"fields\": " + "[{\"name\": \"f1\", \"type\": \"long\", " + "\"arrayField\": \"[1]\", " + "\"booleanField\": \"true\", " + "\"mapField\": \"{\\\"key1\\\":\\\"value1\\\", \\\"key2\\\":\\\"value2\\\"}\", " + "\"nullField\": \"null\", " + "\"numberField\": \"1.23\", " + "\"stringField\": \"\\\"field value with \\\"double quotes\\\"\\\"\"" + "}]}"; + testNodeRecord(nodeRecordWithCustomAttribute, + expectedJsonWithCustomAttribute); + } + + // Create NodeRecord without custom attributes at field level + // validate json serialization + void checkNodeRecordWithoutCustomAttribute() { + Name recordName("Test"); + HasName nameConcept(recordName); + concepts::MultiAttribute fieldNames; + concepts::MultiAttribute fieldValues; + std::vector defaultValues; + + fieldNames.add("f1"); + fieldValues.add(NodePtr(new NodePrimitive(Type::AVRO_LONG))); + + NodeRecord nodeRecordWithoutCustomAttribute(nameConcept, fieldValues, + fieldNames, defaultValues); + std::string expectedJsonWithoutCustomAttribute = + "{\"type\": \"record\", \"name\": \"Test\",\"fields\": " + "[{\"name\": \"f1\", \"type\": \"long\"}]}"; + testNodeRecord(nodeRecordWithoutCustomAttribute, + expectedJsonWithoutCustomAttribute); + } + + void checkCustomAttributes_getAttribute() { + CustomAttributes cf; + cf.addAttribute("field1", std::string("1")); + + BOOST_CHECK_EQUAL(std::string("1"), *cf.getAttribute("field1")); + BOOST_CHECK_EQUAL(false, cf.getAttribute("not_existing").has_value()); + } + void test() { std::cout << "Before\n"; schema_.toJson(std::cout); @@ -408,6 +518,10 @@ struct TestSchema { readValidatedData(); createExampleSchema(); + + checkNodeRecordWithoutCustomAttribute(); + checkNodeRecordWithCustomAttribute(); + checkCustomAttributes_getAttribute(); } ValidSchema schema_; @@ -917,8 +1031,8 @@ struct TestResolution { }; void testNestedArraySchema() { - ArraySchema b0 = ArraySchema(NullSchema()); - ArraySchema a0 = ArraySchema(b0); + ArraySchema b0{NullSchema()}; + ArraySchema a0 = b0; avro::ValidSchema vs(a0); std::ostringstream actual; @@ -935,8 +1049,8 @@ void testNestedArraySchema() { } void testNestedMapSchema() { - MapSchema b0 = MapSchema(NullSchema()); - MapSchema a0 = MapSchema(b0); + MapSchema b0{NullSchema()}; + MapSchema a0 = b0; avro::ValidSchema vs(a0); std::ostringstream actual; diff --git a/lang/c/CMakeLists.txt b/lang/c/CMakeLists.txt index aa923e1829a..49e2a36bc8f 100644 --- a/lang/c/CMakeLists.txt +++ b/lang/c/CMakeLists.txt @@ -16,7 +16,7 @@ # specific language governing permissions and limitations # under the License. # -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.5) project(AvroC C) enable_testing() @@ -151,7 +151,7 @@ endif (ZLIB_FOUND) find_package(Snappy) if (SNAPPY_FOUND AND ZLIB_FOUND) # Snappy borrows crc32 from zlib - set(SNAPPY_PKG libsnappy) + set(SNAPPY_PKG snappy) add_definitions(-DSNAPPY_CODEC) include_directories(${SNAPPY_INCLUDE_DIRS}) message("Enabled snappy codec") @@ -176,16 +176,16 @@ else (LZMA_FOUND) endif (LZMA_FOUND) set(CODEC_LIBRARIES ${ZLIB_LIBRARIES} ${LZMA_LIBRARIES} ${SNAPPY_LIBRARIES}) -set(CODEC_PKG "@ZLIB_PKG@ @LZMA_PKG@ @SNAPPY_PKG@") +set(CODEC_PKG "${ZLIB_PKG} ${LZMA_PKG} ${SNAPPY_PKG}") # Jansson JSON library -pkg_check_modules(JANSSON jansson>=2.3) +pkg_check_modules(JANSSON jansson>=2.7) if (JANSSON_FOUND) set(JANSSON_PKG libjansson) include_directories(${JANSSON_INCLUDE_DIRS}) link_directories(${JANSSON_LIBRARY_DIRS}) else (JANSSON_FOUND) - message(FATAL_ERROR "libjansson >=2.3 not found") + message(FATAL_ERROR "libjansson >=2.7 not found") endif (JANSSON_FOUND) diff --git a/lang/c/build.sh b/lang/c/build.sh index 6753e778dc2..5464ef3fd23 100755 --- a/lang/c/build.sh +++ b/lang/c/build.sh @@ -69,6 +69,11 @@ do make -C $build_dir test ;; + docs) + prepare_build + make -C $build_dir docs + ;; + dist) prepare_build cp ../../share/VERSION.txt $root_dir diff --git a/lang/c/cmake_avrolib.bat b/lang/c/cmake_avrolib.bat index 76934bca205..40e8b39e3d7 100644 --- a/lang/c/cmake_avrolib.bat +++ b/lang/c/cmake_avrolib.bat @@ -1,48 +1,48 @@ -REM Licensed to the Apache Software Foundation (ASF) under one -REM or more contributor license agreements. See the NOTICE file -REM distributed with this work for additional information -REM regarding copyright ownership. The ASF licenses this file -REM to you under the Apache License, Version 2.0 (the -REM "License"); you may not use this file except in compliance -REM with the License. You may obtain a copy of the License at -REM -REM https://www.apache.org/licenses/LICENSE-2.0 -REM -REM Unless required by applicable law or agreed to in writing, -REM software distributed under the License is distributed on an -REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -REM KIND, either express or implied. See the License for the -REM specific language governing permissions and limitations -REM under the License. - -echo off - -REM Set up the solution file in Windows. - -set my_cmake_path="put_your_cmake_path_here" -set cmake_path_win7="C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -set cmake_path_xp="C:\Program Files\CMake 2.8\bin\cmake.exe" - -if exist %my_cmake_path% ( - set cmake_path=%my_cmake_path% - goto RUN_CMAKE -) - -if exist %cmake_path_win7% ( - set cmake_path=%cmake_path_win7% - goto RUN_CMAKE -) - -if exist %cmake_path_xp% ( - set cmake_path=%cmake_path_xp% - goto RUN_CMAKE -) - -echo "Set the proper cmake path in the variable 'my_cmake_path' in cmake_windows.bat, and re-run" -goto EXIT_ERROR - -:RUN_CMAKE -%cmake_path% -G"Visual Studio 9 2008" -H. -Bbuild_win32 - - -:EXIT_ERROR +REM Licensed to the Apache Software Foundation (ASF) under one +REM or more contributor license agreements. See the NOTICE file +REM distributed with this work for additional information +REM regarding copyright ownership. The ASF licenses this file +REM to you under the Apache License, Version 2.0 (the +REM "License"); you may not use this file except in compliance +REM with the License. You may obtain a copy of the License at +REM +REM https://www.apache.org/licenses/LICENSE-2.0 +REM +REM Unless required by applicable law or agreed to in writing, +REM software distributed under the License is distributed on an +REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +REM KIND, either express or implied. See the License for the +REM specific language governing permissions and limitations +REM under the License. + +echo off + +REM Set up the solution file in Windows. + +set my_cmake_path="put_your_cmake_path_here" +set cmake_path_win7="C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" +set cmake_path_xp="C:\Program Files\CMake 2.8\bin\cmake.exe" + +if exist %my_cmake_path% ( + set cmake_path=%my_cmake_path% + goto RUN_CMAKE +) + +if exist %cmake_path_win7% ( + set cmake_path=%cmake_path_win7% + goto RUN_CMAKE +) + +if exist %cmake_path_xp% ( + set cmake_path=%cmake_path_xp% + goto RUN_CMAKE +) + +echo "Set the proper cmake path in the variable 'my_cmake_path' in cmake_windows.bat, and re-run" +goto EXIT_ERROR + +:RUN_CMAKE +%cmake_path% -G"Visual Studio 9 2008" -H. -Bbuild_win32 + + +:EXIT_ERROR diff --git a/lang/c/docs/index.txt b/lang/c/docs/index.txt index a439a052631..86f67bc8f96 100644 --- a/lang/c/docs/index.txt +++ b/lang/c/docs/index.txt @@ -117,7 +117,7 @@ This section provides an overview of the methods that you can call on an interface, but not all of them make sense for all Avro schema types. For instance, you won't be able to call +avro_value_set_boolean+ on an Avro array value. If you try to call an inappropriate method, we'll -return an +EINVAL+ error code. +return an +EINVAL+/+AVRO_INVALID+ error code. Note that the functions in this section apply to _all_ Avro values, regardless of which value implementation is used under the covers. This @@ -178,7 +178,7 @@ different versions of the Avro library. That means that it's really only safe to use these hash values internally within the context of a single execution of a single application. -The +reset+ method “clears out” an +avro_value_t instance, making sure +The +reset+ method “clears out” an +avro_value_t+ instance, making sure that it's ready to accept the contents of a new value. For scalars, this is usually a no-op, since the new value will just overwrite the old one. For arrays and maps, this removes any existing elements from the diff --git a/lang/c/examples/quickstop.c b/lang/c/examples/quickstop.c index ff9e9700590..b26dad10c04 100644 --- a/lang/c/examples/quickstop.c +++ b/lang/c/examples/quickstop.c @@ -107,7 +107,7 @@ int print_person(avro_file_reader_t db, avro_schema_t reader_schema) if (rval == 0) { int64_t id; int32_t age; - int32_t *p; + const char *p; size_t size; avro_value_t id_value; avro_value_t first_value; diff --git a/lang/c/src/avro/basics.h b/lang/c/src/avro/basics.h index 368509b90c8..62c899c691a 100644 --- a/lang/c/src/avro/basics.h +++ b/lang/c/src/avro/basics.h @@ -24,6 +24,7 @@ extern "C" { #define CLOSE_EXTERN #endif +#include enum avro_type_t { AVRO_STRING, @@ -40,7 +41,8 @@ enum avro_type_t { AVRO_MAP, AVRO_ARRAY, AVRO_UNION, - AVRO_LINK + AVRO_LINK, + AVRO_INVALID = EINVAL, }; typedef enum avro_type_t avro_type_t; diff --git a/lang/c/src/avro/refcount.h b/lang/c/src/avro/refcount.h index 27369900ab9..fd431f3ccea 100644 --- a/lang/c/src/avro/refcount.h +++ b/lang/c/src/avro/refcount.h @@ -86,7 +86,10 @@ avro_refcount_dec(volatile int *refcount) * Mac OS X */ -#elif __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 1050 +#elif __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 1050 \ + && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101200 + +/* macOS 10.12 deprecates OSAtomic* so we'll use the GCC/Clang branch below */ #include diff --git a/lang/c/src/codec.c b/lang/c/src/codec.c index 613a914373a..176fb21d657 100644 --- a/lang/c/src/codec.c +++ b/lang/c/src/codec.c @@ -27,6 +27,9 @@ # elif defined(_WIN32) # include # define __bswap_32 _byteswap_ulong +# elif defined(__ANDROID__) +# include +# define __bswap_32 bswap_32 # else # include # endif diff --git a/lang/c/src/consume-binary.c b/lang/c/src/consume-binary.c index 9f92799d888..5e1db20684f 100644 --- a/lang/c/src/consume-binary.c +++ b/lang/c/src/consume-binary.c @@ -322,6 +322,9 @@ avro_consume_binary(avro_reader_t reader, avro_consumer_t *consumer, void *ud) case AVRO_LINK: avro_set_error("Consumer can't consume a link schema directly"); return EINVAL; + case AVRO_INVALID: + avro_set_error("Consumer can't consume an invalid schema"); + return EINVAL; } return 0; diff --git a/lang/c/src/datafile.c b/lang/c/src/datafile.c index c9d4dfeb68e..bbcf69493b7 100644 --- a/lang/c/src/datafile.c +++ b/lang/c/src/datafile.c @@ -451,12 +451,24 @@ static int file_read_block_count(avro_file_reader_t r) "Cannot read file block count: "); check_prefix(rval, enc->read_long(r->reader, &len), "Cannot read file block size: "); + if (len < 0) { + avro_set_error("Invalid block size: %" PRId64, len); + return EINVAL; + } if (r->current_blockdata && len > r->current_blocklen) { r->current_blockdata = (char *) avro_realloc(r->current_blockdata, r->current_blocklen, len); + if (!r->current_blockdata) { + avro_set_error("Cannot allocate block buffer"); + return ENOMEM; + } r->current_blocklen = len; } else if (!r->current_blockdata) { r->current_blockdata = (char *) avro_malloc(len); + if (!r->current_blockdata && len > 0) { + avro_set_error("Cannot allocate block buffer"); + return ENOMEM; + } r->current_blocklen = len; } diff --git a/lang/c/src/datum.c b/lang/c/src/datum.c index 2c427809090..5307c7a8385 100644 --- a/lang/c/src/datum.c +++ b/lang/c/src/datum.c @@ -1086,6 +1086,7 @@ static void avro_datum_free(avro_datum_t datum) } break; case AVRO_NULL: + case AVRO_INVALID: /* Nothing allocated */ break; @@ -1094,7 +1095,7 @@ static void avro_datum_free(avro_datum_t datum) record = avro_datum_to_record(datum); avro_schema_decref(record->schema); st_foreach(record->fields_byname, - HASH_FUNCTION_CAST char_datum_free_foreach, 0); + (hash_function_foreach) char_datum_free_foreach, 0); st_free_table(record->field_order); st_free_table(record->fields_byname); avro_freet(struct avro_record_datum_t, record); @@ -1122,7 +1123,7 @@ static void avro_datum_free(avro_datum_t datum) struct avro_map_datum_t *map; map = avro_datum_to_map(datum); avro_schema_decref(map->schema); - st_foreach(map->map, HASH_FUNCTION_CAST char_datum_free_foreach, + st_foreach(map->map, (hash_function_foreach) char_datum_free_foreach, 0); st_free_table(map->map); st_free_table(map->indices_by_key); @@ -1134,7 +1135,7 @@ static void avro_datum_free(avro_datum_t datum) struct avro_array_datum_t *array; array = avro_datum_to_array(datum); avro_schema_decref(array->schema); - st_foreach(array->els, HASH_FUNCTION_CAST array_free_foreach, 0); + st_foreach(array->els, (hash_function_foreach) array_free_foreach, 0); st_free_table(array->els); avro_freet(struct avro_array_datum_t, array); } @@ -1182,7 +1183,7 @@ avro_datum_reset(avro_datum_t datum) { struct avro_array_datum_t *array; array = avro_datum_to_array(datum); - st_foreach(array->els, HASH_FUNCTION_CAST array_free_foreach, 0); + st_foreach(array->els, (hash_function_foreach) array_free_foreach, 0); st_free_table(array->els); rval = avro_init_array(array); @@ -1197,7 +1198,7 @@ avro_datum_reset(avro_datum_t datum) { struct avro_map_datum_t *map; map = avro_datum_to_map(datum); - st_foreach(map->map, HASH_FUNCTION_CAST char_datum_free_foreach, 0); + st_foreach(map->map, (hash_function_foreach) char_datum_free_foreach, 0); st_free_table(map->map); st_free_table(map->indices_by_key); st_free_table(map->keys_by_index); @@ -1216,7 +1217,7 @@ avro_datum_reset(avro_datum_t datum) record = avro_datum_to_record(datum); rval = 0; st_foreach(record->fields_byname, - HASH_FUNCTION_CAST datum_reset_foreach, (st_data_t) &rval); + (hash_function_foreach) datum_reset_foreach, (st_data_t) &rval); return rval; } diff --git a/lang/c/src/datum_equal.c b/lang/c/src/datum_equal.c index 2ef750f9bf9..7e7c9b94099 100644 --- a/lang/c/src/datum_equal.c +++ b/lang/c/src/datum_equal.c @@ -78,7 +78,7 @@ static int map_equal(struct avro_map_datum_t *a, struct avro_map_datum_t *b) if (a->map->num_entries != b->map->num_entries) { return 0; } - st_foreach(a->map, HASH_FUNCTION_CAST st_equal_foreach, (st_data_t) & args); + st_foreach(a->map, (hash_function_foreach) st_equal_foreach, (st_data_t) & args); return args.rval; } @@ -93,7 +93,7 @@ static int record_equal(struct avro_record_datum_t *a, if (a->fields_byname->num_entries != b->fields_byname->num_entries) { return 0; } - st_foreach(a->fields_byname, HASH_FUNCTION_CAST st_equal_foreach, (st_data_t) & args); + st_foreach(a->fields_byname, (hash_function_foreach) st_equal_foreach, (st_data_t) & args); return args.rval; } @@ -181,6 +181,12 @@ int avro_datum_equal(const avro_datum_t a, const avro_datum_t b) * TODO */ return 0; + case AVRO_INVALID: + /* + * Invalid datums should not be compared and returning 0 + * matches the other error conditions + */ + return 0; } return 0; } diff --git a/lang/c/src/datum_size.c b/lang/c/src/datum_size.c index 770cb655f57..3877f31384c 100644 --- a/lang/c/src/datum_size.c +++ b/lang/c/src/datum_size.c @@ -126,7 +126,7 @@ size_map(avro_writer_t writer, const avro_encoding_t * enc, if (datum->map->num_entries) { size_accum(rval, size, enc->size_long(writer, datum->map->num_entries)); - st_foreach(datum->map, HASH_FUNCTION_CAST size_map_foreach, (st_data_t) & args); + st_foreach(datum->map, (hash_function_foreach) size_map_foreach, (st_data_t) & args); size += args.size; } if (!args.rval) { @@ -271,6 +271,7 @@ static int64_t size_datum(avro_writer_t writer, const avro_encoding_t * enc, avro_datum_to_union(datum)); case AVRO_LINK: + case AVRO_INVALID: break; } diff --git a/lang/c/src/datum_skip.c b/lang/c/src/datum_skip.c index aa51d793468..e0ce561642e 100644 --- a/lang/c/src/datum_skip.c +++ b/lang/c/src/datum_skip.c @@ -196,6 +196,9 @@ int avro_skip_data(avro_reader_t reader, avro_schema_t writers_schema) avro_skip_data(reader, (avro_schema_to_link(writers_schema))->to); break; + case AVRO_INVALID: + rval = EINVAL; + break; } return rval; diff --git a/lang/c/src/datum_validate.c b/lang/c/src/datum_validate.c index d15ebdddaad..e997d306761 100644 --- a/lang/c/src/datum_validate.c +++ b/lang/c/src/datum_validate.c @@ -123,7 +123,7 @@ avro_schema_datum_validate(avro_schema_t expected_schema, avro_datum_t datum) { avro_schema_to_map(expected_schema)->values, 1 }; st_foreach(avro_datum_to_map(datum)->map, - HASH_FUNCTION_CAST schema_map_validate_foreach, + (hash_function_foreach) schema_map_validate_foreach, (st_data_t) & vst); return vst.rval; } @@ -188,6 +188,8 @@ avro_schema_datum_validate(avro_schema_t expected_schema, avro_datum_t datum) datum); } break; + case AVRO_INVALID: + return EINVAL; } return 0; } diff --git a/lang/c/src/datum_value.c b/lang/c/src/datum_value.c index a4fa55a0c9e..597d38c45bc 100644 --- a/lang/c/src/datum_value.c +++ b/lang/c/src/datum_value.c @@ -80,19 +80,7 @@ avro_datum_value_get_type(const avro_value_iface_t *iface, const void *vself) { AVRO_UNUSED(iface); const avro_datum_t self = (const avro_datum_t) vself; -#ifdef _WIN32 -#pragma message("#warning: Bug: EINVAL is not of type avro_type_t.") -#else -#warning "Bug: EINVAL is not of type avro_type_t." -#endif - /* We shouldn't use EINVAL as the return value to - * check_param(), because EINVAL (= 22) is not a valid enum - * avro_type_t. This is a structural issue -- we would need a - * different interface on all the get_type functions to fix - * this. For now, suppressing the error by casting EINVAL to - * (avro_type_t) so the code compiles under C++. - */ - check_param((avro_type_t) EINVAL, self, "datum instance"); + check_param(AVRO_INVALID, self, "datum instance"); return avro_typeof(self); } diff --git a/lang/c/src/encoding_binary.c b/lang/c/src/encoding_binary.c index 1fc5f0c9a7b..96dacea5836 100644 --- a/lang/c/src/encoding_binary.c +++ b/lang/c/src/encoding_binary.c @@ -127,6 +127,10 @@ static int read_bytes(avro_reader_t reader, char **bytes, int64_t * len) int rval; check_prefix(rval, read_long(reader, len), "Cannot read bytes length: "); + if (*len < 0) { + avro_set_error("Invalid bytes length: %" PRId64, *len); + return EINVAL; + } *bytes = (char *) avro_malloc(*len + 1); if (!*bytes) { avro_set_error("Cannot allocate buffer for bytes value"); @@ -143,6 +147,10 @@ static int skip_bytes(avro_reader_t reader) int rval; check_prefix(rval, read_long(reader, &len), "Cannot read bytes length: "); + if (len < 0) { + avro_set_error("Invalid bytes length: %" PRId64, len); + return EINVAL; + } AVRO_SKIP(reader, len); return 0; } @@ -175,6 +183,10 @@ static int read_string(avro_reader_t reader, char **s, int64_t *len) int rval; check_prefix(rval, read_long(reader, &str_len), "Cannot read string length: "); + if (str_len < 0) { + avro_set_error("Invalid string length: %" PRId64, str_len); + return EINVAL; + } *len = str_len + 1; *s = (char *) avro_malloc(*len); if (!*s) { diff --git a/lang/c/src/memoize.c b/lang/c/src/memoize.c index 933fecbd043..e3602884d0d 100644 --- a/lang/c/src/memoize.c +++ b/lang/c/src/memoize.c @@ -52,8 +52,8 @@ avro_memoize_key_hash(avro_memoize_key_t *a) static struct st_hash_type avro_memoize_hash_type = { - HASH_FUNCTION_CAST avro_memoize_key_cmp, - HASH_FUNCTION_CAST avro_memoize_key_hash + (hash_function_compare) avro_memoize_key_cmp, + (hash_function_hash) avro_memoize_key_hash }; @@ -78,7 +78,7 @@ avro_memoize_free_key(avro_memoize_key_t *key, void *result, void *dummy) void avro_memoize_done(avro_memoize_t *mem) { - st_foreach((st_table *) mem->cache, HASH_FUNCTION_CAST avro_memoize_free_key, 0); + st_foreach((st_table *) mem->cache, (hash_function_foreach) avro_memoize_free_key, 0); st_free_table((st_table *) mem->cache); memset(mem, 0, sizeof(avro_memoize_t)); } diff --git a/lang/c/src/schema.c b/lang/c/src/schema.c index 7b389002b00..a4d8e9f898a 100644 --- a/lang/c/src/schema.c +++ b/lang/c/src/schema.c @@ -126,6 +126,7 @@ static void avro_schema_free(avro_schema_t schema) case AVRO_DOUBLE: case AVRO_BOOLEAN: case AVRO_NULL: + case AVRO_INVALID: /* no memory allocated for primitives */ return; @@ -136,7 +137,7 @@ static void avro_schema_free(avro_schema_t schema) if (record->space) { avro_str_free(record->space); } - st_foreach(record->fields, HASH_FUNCTION_CAST record_free_foreach, + st_foreach(record->fields, (hash_function_foreach) record_free_foreach, 0); st_free_table(record->fields_byname); st_free_table(record->fields); @@ -151,7 +152,7 @@ static void avro_schema_free(avro_schema_t schema) if (enump->space) { avro_str_free(enump->space); } - st_foreach(enump->symbols, HASH_FUNCTION_CAST enum_free_foreach, + st_foreach(enump->symbols, (hash_function_foreach) enum_free_foreach, 0); st_free_table(enump->symbols); st_free_table(enump->symbols_byname); @@ -188,7 +189,7 @@ static void avro_schema_free(avro_schema_t schema) case AVRO_UNION:{ struct avro_union_schema_t *unionp; unionp = avro_schema_to_union(schema); - st_foreach(unionp->branches, HASH_FUNCTION_CAST union_free_foreach, + st_foreach(unionp->branches, (hash_function_foreach) union_free_foreach, 0); st_free_table(unionp->branches); st_free_table(unionp->branches_byname); @@ -876,15 +877,7 @@ static int avro_schema_from_json_t(json_t *json, avro_schema_t *schema, st_table *named_schemas, const char *parent_namespace) { -#ifdef _WIN32 - #pragma message("#warning: Bug: '0' is not of type avro_type_t.") -#else - #warning "Bug: '0' is not of type avro_type_t." -#endif - /* We should really have an "AVRO_INVALID" type in - * avro_type_t. Suppress warning below in which we set type to 0. - */ - avro_type_t type = (avro_type_t) 0; + avro_type_t type = AVRO_INVALID; unsigned int i; avro_schema_t named_type = NULL; @@ -1246,7 +1239,7 @@ avro_schema_from_json_root(json_t *root, avro_schema_t *schema) /* json_dumpf(root, stderr, 0); */ rval = avro_schema_from_json_t(root, schema, named_schemas, NULL); json_decref(root); - st_foreach(named_schemas, HASH_FUNCTION_CAST named_schema_free_foreach, 0); + st_foreach(named_schemas, (hash_function_foreach) named_schema_free_foreach, 0); st_free_table(named_schemas); return rval; } @@ -1462,7 +1455,7 @@ avro_schema_t avro_schema_copy(avro_schema_t schema) } new_schema = avro_schema_copy_root(schema, named_schemas); - st_foreach(named_schemas, HASH_FUNCTION_CAST named_schema_free_foreach, 0); + st_foreach(named_schemas, (hash_function_foreach) named_schema_free_foreach, 0); st_free_table(named_schemas); return new_schema; } @@ -1882,6 +1875,8 @@ avro_schema_to_json2(const avro_schema_t schema, avro_writer_t out, return write_union(out, avro_schema_to_union(schema), parent_namespace); case AVRO_LINK: return write_link(out, avro_schema_to_link(schema), parent_namespace); + case AVRO_INVALID: + return EINVAL; } if (is_avro_primitive(schema)) { diff --git a/lang/c/src/st.c b/lang/c/src/st.c index 27578289ecb..8437777cb92 100644 --- a/lang/c/src/st.c +++ b/lang/c/src/st.c @@ -39,8 +39,8 @@ struct st_table_entry { static int numcmp(long, long); static int numhash(long); static struct st_hash_type type_numhash = { - HASH_FUNCTION_CAST numcmp, - HASH_FUNCTION_CAST numhash + (hash_function_compare) numcmp, + (hash_function_hash) numhash }; /* @@ -48,8 +48,8 @@ static struct st_hash_type type_numhash = { */ static int strhash(const char *); static struct st_hash_type type_strhash = { - HASH_FUNCTION_CAST strcmp, - HASH_FUNCTION_CAST strhash + (hash_function_compare) strcmp, + (hash_function_hash) strhash }; static void rehash(st_table *); @@ -212,7 +212,7 @@ void st_free_table(st_table *table) } #define PTR_NOT_EQUAL(table, ptr, hash_val, key) \ -((ptr) != 0 && (ptr->hash != (hash_val) || !EQUAL((table), (key), (ptr)->key))) +((ptr) != 0 && (ptr->hash != (hash_val) || !EQUAL((table), (void*) (key), (void*) (ptr)->key))) #ifdef HASH_LOG #define COLLISION collision++ @@ -237,7 +237,7 @@ int st_lookup(st_table *table, register st_data_t key, st_data_t *value) unsigned int hash_val, bin_pos; register st_table_entry *ptr; - hash_val = do_hash(key, table); + hash_val = do_hash((void*) key, table); FIND_ENTRY(table, ptr, hash_val, bin_pos); if (ptr == 0) { @@ -272,7 +272,7 @@ int st_insert(register st_table *table, register st_data_t key, st_data_t value) unsigned int hash_val, bin_pos; register st_table_entry *ptr; - hash_val = do_hash(key, table); + hash_val = do_hash((void*) key, table); FIND_ENTRY(table, ptr, hash_val, bin_pos); if (ptr == 0) { @@ -288,7 +288,7 @@ void st_add_direct(st_table *table,st_data_t key,st_data_t value) { unsigned int hash_val, bin_pos; - hash_val = do_hash(key, table); + hash_val = do_hash((void*) key, table); bin_pos = hash_val % table->num_bins; ADD_DIRECT(table, key, value, hash_val, bin_pos); } @@ -363,7 +363,7 @@ int st_delete(register st_table *table,register st_data_t *key,st_data_t *value) st_table_entry *tmp; register st_table_entry *ptr; - hash_val = do_hash_bin(*key, table); + hash_val = do_hash_bin((void*) *key, table); ptr = table->bins[hash_val]; if (ptr == 0) { @@ -372,7 +372,7 @@ int st_delete(register st_table *table,register st_data_t *key,st_data_t *value) return 0; } - if (EQUAL(table, *key, ptr->key)) { + if (EQUAL(table, (void*) *key, (void*) ptr->key)) { table->bins[hash_val] = ptr->next; table->num_entries--; if (value != 0) @@ -383,7 +383,7 @@ int st_delete(register st_table *table,register st_data_t *key,st_data_t *value) } for (; ptr->next != 0; ptr = ptr->next) { - if (EQUAL(table, ptr->next->key, *key)) { + if (EQUAL(table, (void*) ptr->next->key, (void*) *key)) { tmp = ptr->next; ptr->next = ptr->next->next; table->num_entries--; @@ -403,7 +403,7 @@ int st_delete_safe(register st_table *table,register st_data_t *key,st_data_t *v unsigned int hash_val; register st_table_entry *ptr; - hash_val = do_hash_bin(*key, table); + hash_val = do_hash_bin((void*) *key, table); ptr = table->bins[hash_val]; if (ptr == 0) { @@ -413,7 +413,7 @@ int st_delete_safe(register st_table *table,register st_data_t *key,st_data_t *v } for (; ptr != 0; ptr = ptr->next) { - if ((ptr->key != never) && EQUAL(table, ptr->key, *key)) { + if ((ptr->key != never) && EQUAL(table, (void*) ptr->key, (void*) *key)) { table->num_entries--; *key = ptr->key; if (value != 0) @@ -439,11 +439,11 @@ void st_cleanup_safe(st_table *table,st_data_t never) { int num_entries = table->num_entries; - st_foreach(table, HASH_FUNCTION_CAST delete_never, never); + st_foreach(table, (hash_function_foreach) delete_never, never); table->num_entries = num_entries; } -int st_foreach(st_table *table,int (*func) (ANYARGS),st_data_t arg) +int st_foreach(st_table *table,int (*func) (void*, void*, void*),st_data_t arg) { st_table_entry *ptr, *last, *tmp; enum st_retval retval; @@ -452,7 +452,9 @@ int st_foreach(st_table *table,int (*func) (ANYARGS),st_data_t arg) for (i = 0; i < table->num_bins; i++) { last = 0; for (ptr = table->bins[i]; ptr != 0;) { - retval = (enum st_retval) (*func) (ptr->key, ptr->record, arg); + retval = (enum st_retval) (*func) ((void*) ptr->key, + (void*) ptr->record, + (void*) arg); switch (retval) { case ST_CHECK: /* check if hash is modified during * iteration */ diff --git a/lang/c/src/st.h b/lang/c/src/st.h index cf8a2249169..93da018bd9b 100644 --- a/lang/c/src/st.h +++ b/lang/c/src/st.h @@ -20,26 +20,22 @@ extern "C" { #pragma GCC visibility push(hidden) -#ifndef ANYARGS - #ifdef __cplusplus - #define ANYARGS ... - #else - #define ANYARGS - #endif -#endif - #ifdef _WIN32 - #define HASH_FUNCTION_CAST (int (__cdecl *)(ANYARGS)) + typedef int (__cdecl *hash_function_compare)(void*, void*); + typedef int (__cdecl *hash_function_hash)(void*); + typedef int (__cdecl *hash_function_foreach)(void*, void*, void*); #else - #define HASH_FUNCTION_CAST + typedef int (*hash_function_compare)(void*, void*); + typedef int (*hash_function_hash)(void*); + typedef int (*hash_function_foreach)(void*, void*, void*); #endif typedef uintptr_t st_data_t; typedef struct st_table st_table; struct st_hash_type { - int (*compare) (ANYARGS); - int (*hash) (ANYARGS); + hash_function_compare compare; + hash_function_hash hash; }; struct st_table { @@ -67,7 +63,7 @@ int st_delete _((st_table *, st_data_t *, st_data_t *)); int st_delete_safe _((st_table *, st_data_t *, st_data_t *, st_data_t)); int st_insert _((st_table *, st_data_t, st_data_t)); int st_lookup _((st_table *, st_data_t, st_data_t *)); -int st_foreach _((st_table *, int (*)(ANYARGS), st_data_t)); +int st_foreach _((st_table *, hash_function_foreach, st_data_t)); void st_add_direct _((st_table *, st_data_t, st_data_t)); void st_free_table _((st_table *)); void st_cleanup_safe _((st_table *, st_data_t)); diff --git a/lang/c/src/value-json.c b/lang/c/src/value-json.c index 53c2b3d3e42..7927c14dd84 100644 --- a/lang/c/src/value-json.c +++ b/lang/c/src/value-json.c @@ -29,7 +29,8 @@ #include "jansson.h" /* - * Converts a binary buffer into a NUL-terminated JSON UTF-8 string. + * Converts a binary buffer into a JSON UTF-8 string which is NOT + * terminated with a null byte ('\0'). * Avro bytes and fixed values are encoded in JSON as a string, and JSON * strings must be in UTF-8. For these Avro types, the JSON string is * restricted to the characters U+0000..U+00FF, which corresponds to the @@ -51,7 +52,7 @@ encode_utf8_bytes(const void *src, size_t src_len, // the range 0x80..0xff will take up two. const uint8_t *src8 = (const uint8_t *) src; - size_t utf8_len = src_len + 1; // +1 for NUL terminator + size_t utf8_len = src_len; size_t i; for (i = 0; i < src_len; i++) { if (src8[i] & 0x80) { @@ -76,8 +77,6 @@ encode_utf8_bytes(const void *src, size_t src_len, } } - *curr = '\0'; - // And we're good. *dest = dest8; *dest_len = utf8_len; @@ -127,7 +126,7 @@ avro_value_to_json_t(const avro_value_t *value) return NULL; } - json_t *result = json_string_nocheck((const char *) encoded); + json_t *result = json_stringn_nocheck((const char *) encoded, encoded_size); avro_free(encoded, encoded_size); if (result == NULL) { avro_set_error("Cannot allocate JSON bytes"); @@ -242,7 +241,7 @@ avro_value_to_json_t(const avro_value_t *value) return NULL; } - json_t *result = json_string_nocheck((const char *) encoded); + json_t *result = json_stringn_nocheck((const char *) encoded, encoded_size); avro_free(encoded, encoded_size); if (result == NULL) { avro_set_error("Cannot allocate JSON fixed"); diff --git a/lang/c/tests/CMakeLists.txt b/lang/c/tests/CMakeLists.txt index 2e84a06a31c..3200164770d 100644 --- a/lang/c/tests/CMakeLists.txt +++ b/lang/c/tests/CMakeLists.txt @@ -64,7 +64,9 @@ add_avro_executable(test_interop_data) add_avro_test_checkmem(test_data_structures) add_avro_test_checkmem(test_avro_schema) +add_avro_test_checkmem(test_avro_commons_schema) add_avro_test_checkmem(test_avro_schema_names) +add_avro_test_checkmem(test_avro_type_collision) add_avro_test_checkmem(test_avro_values) add_avro_test_checkmem(test_avro_766) add_avro_test_checkmem(test_avro_968) diff --git a/lang/c/tests/test_avro_commons_schema.c b/lang/c/tests/test_avro_commons_schema.c new file mode 100644 index 00000000000..e3751e9836a --- /dev/null +++ b/lang/c/tests/test_avro_commons_schema.c @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +#include "avro.h" +#include "avro_private.h" +#include +#include +#include +#include +#ifdef _WIN32 + #include "msdirent.h" +#else + #include +#endif + +avro_writer_t avro_stderr; + +static avro_schema_t read_common_schema_test(const char *dirpath) { + char schemafilepath[1024]; + char jsontext[4096]; + + avro_schema_t schema; + int n = snprintf(schemafilepath, sizeof(schemafilepath), "%s/schema.json", dirpath); + if (n < 0) { + fprintf(stderr, "Size of dir path is too long %s !\n", dirpath); + exit(EXIT_FAILURE); + } + FILE* fp = fopen(schemafilepath, "r"); + if (!fp) { + fprintf(stderr, "can't open file %s !\n", schemafilepath); + exit(EXIT_FAILURE); + } + int rval = fread(jsontext, 1, sizeof(jsontext) - 1, fp); + fclose(fp); + jsontext[rval] = '\0'; + + int test_rval = avro_schema_from_json(jsontext, 0, &schema, NULL); + if (test_rval != 0) { + fprintf(stderr, "fail! Can' read schema from file %s\n", schemafilepath); + exit(EXIT_FAILURE); + } + return schema; +} + +static void create_writer(avro_schema_t schema, avro_file_writer_t* writer) +{ + // create / reset copy.avro file. + FILE* copyFile = fopen("./copy.avro", "w"); + if (!copyFile) { + fprintf(stderr, "can't create file copy.avro !\n"); + exit(EXIT_FAILURE); + } + fclose(copyFile); + + // create avro writer on file. + if (avro_file_writer_create("./copy.avro", schema, writer)) { + fprintf(stdout, "\nThere was an error creating db: %s", avro_strerror()); + exit(EXIT_FAILURE); + } +} + +static void read_data(const char *dirpath, avro_schema_t schema) { + char datafilepath[1024]; + int n = snprintf(datafilepath, sizeof(datafilepath), "%s/data.avro", dirpath); + if (n < 0) { + fprintf(stderr, "Size of dir path is too long %s/data.avro !\n", dirpath); + exit(EXIT_FAILURE); + } + + avro_file_reader_t reader; + avro_datum_t datum; + int rval = avro_file_reader(datafilepath, &reader); + if (rval) { + exit(EXIT_FAILURE); + } + + avro_file_writer_t writer; + create_writer(schema, &writer); + + int records_read = 0; + while ((rval = avro_file_reader_read(reader, schema, &datum)) == 0) { + records_read++; + if (avro_file_writer_append(writer, datum)) { + fprintf(stdout, "\nCan't write record: %s\n", avro_strerror()); + exit(EXIT_FAILURE); + } + + avro_datum_decref(datum); + } + fprintf(stdout, "\nExit run test OK => %d records", records_read); + remove("./copy.avro"); + fflush(stdout); + avro_file_reader_close(reader); + avro_file_writer_close(writer); +} + +static void run_tests(const char *dirpath) +{ + fprintf(stdout, "\nRun test for path '%s'", dirpath); + avro_schema_t schema = read_common_schema_test(dirpath); + read_data(dirpath, schema); + avro_schema_decref(schema); +} + + + +int main(int argc, char *argv[]) +{ + char *srcdir = "../../../share/test/data/schemas"; + AVRO_UNUSED(argc); + AVRO_UNUSED(argv); + + avro_stderr = avro_writer_file(stderr); + + DIR* dir = opendir(srcdir); + if (dir == NULL) { + fprintf(stdout, "Unable to open '%s'\n", srcdir); + fflush(stdout); + exit(EXIT_FAILURE); + } + struct dirent *dent; + do { + dent = readdir(dir); + + if (dent && dent->d_name[0] != '.' && dent->d_type == DT_DIR) { + char filepath[1024]; + snprintf(filepath, sizeof(filepath), "%s/%s", srcdir, dent->d_name); + run_tests(filepath); + } + } + while(dent != NULL); + closedir(dir); + + avro_writer_free(avro_stderr); + return EXIT_SUCCESS; +} diff --git a/lang/c/tests/test_avro_data.c b/lang/c/tests/test_avro_data.c index 1da09e6db94..3a26c67e242 100644 --- a/lang/c/tests/test_avro_data.c +++ b/lang/c/tests/test_avro_data.c @@ -181,14 +181,14 @@ static int test_string(void) static int test_bytes(void) { - char bytes[] = { 0xDE, 0xAD, 0xBE, 0xEF }; + char bytes[] = { 0xDE, 0xAD, 0x00, 0xBE, 0xEF }; avro_schema_t writer_schema = avro_schema_bytes(); avro_datum_t datum; avro_datum_t expected_datum; datum = avro_givebytes(bytes, sizeof(bytes), NULL); write_read_check(writer_schema, datum, NULL, NULL, "bytes"); - test_json(datum, "\"\\u00de\\u00ad\\u00be\\u00ef\""); + test_json(datum, "\"\\u00de\\u00ad\\u0000\\u00be\\u00ef\""); avro_datum_decref(datum); avro_schema_decref(writer_schema); @@ -613,14 +613,14 @@ static int test_union(void) static int test_fixed(void) { - char bytes[] = { 0xD, 0xA, 0xD, 0xA, 0xB, 0xA, 0xB, 0xA }; + char bytes[] = { 0xD, 0xA, 0xD, 0xA, 0xB, 0x0, 0xB, 0xA }; avro_schema_t schema = avro_schema_fixed("msg", sizeof(bytes)); avro_datum_t datum; avro_datum_t expected_datum; datum = avro_givefixed(schema, bytes, sizeof(bytes), NULL); write_read_check(schema, datum, NULL, NULL, "fixed"); - test_json(datum, "\"\\r\\n\\r\\n\\u000b\\n\\u000b\\n\""); + test_json(datum, "\"\\r\\n\\r\\n\\u000b\\u0000\\u000b\\n\""); avro_datum_decref(datum); datum = avro_givefixed(schema, NULL, sizeof(bytes), NULL); diff --git a/lang/c/tests/test_avro_type_collision.c b/lang/c/tests/test_avro_type_collision.c new file mode 100644 index 00000000000..1dda590fd77 --- /dev/null +++ b/lang/c/tests/test_avro_type_collision.c @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +#include "avro.h" + +#include +#include + +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic error "-Wswitch" +#endif + +#define ASSERT_NOT_AVRO_INVALID(type) \ + if (type == AVRO_INVALID) { \ + fprintf(stderr, #type " collides with AVRO_INVALID\n"); \ + exit(EXIT_FAILURE); \ + } else { \ + break; \ + } + +#define CASE_ASSERTION(type) case type: ASSERT_NOT_AVRO_INVALID(type) + +int main(void) +{ + avro_schema_t null_schema = avro_schema_null(); + avro_type_t type = avro_typeof(null_schema); + avro_schema_decref(null_schema); + + switch (type) { + CASE_ASSERTION(AVRO_STRING) + CASE_ASSERTION(AVRO_BYTES) + CASE_ASSERTION(AVRO_INT32) + CASE_ASSERTION(AVRO_INT64) + CASE_ASSERTION(AVRO_FLOAT) + CASE_ASSERTION(AVRO_DOUBLE) + CASE_ASSERTION(AVRO_BOOLEAN) + CASE_ASSERTION(AVRO_NULL) + CASE_ASSERTION(AVRO_RECORD) + CASE_ASSERTION(AVRO_ENUM) + CASE_ASSERTION(AVRO_FIXED) + CASE_ASSERTION(AVRO_MAP) + CASE_ASSERTION(AVRO_ARRAY) + CASE_ASSERTION(AVRO_UNION) + CASE_ASSERTION(AVRO_LINK) + case AVRO_INVALID: + break; + } + + return EXIT_SUCCESS; +} diff --git a/lang/c/version.sh b/lang/c/version.sh index be90c0f6329..0481bcc23e5 100755 --- a/lang/c/version.sh +++ b/lang/c/version.sh @@ -34,9 +34,9 @@ # libavro_binary_age = 0 # libavro_interface_age = 0 # -libavro_micro_version=23 +libavro_micro_version=24 libavro_interface_age=0 -libavro_binary_age=0 +libavro_binary_age=1 # IGNORE EVERYTHING ELSE FROM HERE DOWN......... if test $# != 1; then diff --git a/lang/csharp/.gitignore b/lang/csharp/.gitignore index 80304575bd8..4218bd59d51 100644 --- a/lang/csharp/.gitignore +++ b/lang/csharp/.gitignore @@ -52,5 +52,7 @@ obj/ #Test results TestResult.xml +Coverage +TestResults .vs/ diff --git a/lang/csharp/Avro.sln b/lang/csharp/Avro.sln index 68036ebdd50..729235110a0 100644 --- a/lang/csharp/Avro.sln +++ b/lang/csharp/Avro.sln @@ -13,18 +13,39 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Avro.msbuild", "src\apache\ EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Avro.perf", "src\apache\perf\Avro.perf.csproj", "{AC4E1909-2594-4D01-9B2B-B832C07BAFE5}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Codecs", "Codecs", "{0FAEE4F6-D72F-4B18-869A-7A90BAC1280F}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Avro.File.BZip2", "src\apache\codec\Avro.File.BZip2\Avro.File.BZip2.csproj", "{FFA119B2-0D60-4090-B5A6-ECA718138812}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Avro.File.BZip2.Test", "src\apache\codec\Avro.File.BZip2.Test\Avro.File.BZip2.Test.csproj", "{D5ED6642-3E33-493F-9217-FE00E4885699}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Avro.File.Snappy", "src\apache\codec\Avro.File.Snappy\Avro.File.Snappy.csproj", "{B15BEEDC-A371-46D0-BFF6-63FC8105B520}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Avro.File.Snappy.Test", "src\apache\codec\Avro.File.Snappy.Test\Avro.File.Snappy.Test.csproj", "{AA2CA9A3-71C0-4D16-B7E7-F6F50E400F23}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Avro.File.XZ", "src\apache\codec\Avro.File.XZ\Avro.File.XZ.csproj", "{98CE721F-10AF-4665-9B14-3EA2CDF8F4C7}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Avro.File.XZ.Test", "src\apache\codec\Avro.File.XZ.Test\Avro.File.XZ.Test.csproj", "{99711F8E-C5C1-4864-A51F-3317E19CAD7B}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Avro.File.Zstandard", "src\apache\codec\Avro.File.Zstandard\Avro.File.Zstandard.csproj", "{8207A628-6285-4DDF-B846-C0C7ED3E3D16}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Avro.File.Zstandard.Test", "src\apache\codec\Avro.File.Zstandard.Test\Avro.File.Zstandard.Test.csproj", "{04264DDD-C204-4F59-88D4-FB4C69BD80C3}" +EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{8A671DF3-BC71-4E1A-BB06-0A225799A274}" ProjectSection(SolutionItems) = preProject ..\..\.editorconfig = ..\..\.editorconfig .gitignore = .gitignore Avro.ruleset = Avro.ruleset - build.ps1 = build.ps1 build.sh = build.sh common.props = common.props + versions.props = versions.props + CodeAnalysis.src.globalconfig = CodeAnalysis.src.globalconfig + CodeAnalysis.test.globalconfig = CodeAnalysis.test.globalconfig README.md = README.md - stylecop.json = stylecop.json EndProjectSection EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Avro.benchmark", "src\apache\benchmark\Avro.benchmark.csproj", "{29271A29-9E89-47B1-A0CA-DD6704C89570}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -87,10 +108,128 @@ Global {AC4E1909-2594-4D01-9B2B-B832C07BAFE5}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU {AC4E1909-2594-4D01-9B2B-B832C07BAFE5}.Release|Mixed Platforms.Build.0 = Release|Any CPU {AC4E1909-2594-4D01-9B2B-B832C07BAFE5}.Release|x86.ActiveCfg = Release|Any CPU + {29271A29-9E89-47B1-A0CA-DD6704C89570}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {29271A29-9E89-47B1-A0CA-DD6704C89570}.Debug|Any CPU.Build.0 = Debug|Any CPU + {29271A29-9E89-47B1-A0CA-DD6704C89570}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU + {29271A29-9E89-47B1-A0CA-DD6704C89570}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU + {29271A29-9E89-47B1-A0CA-DD6704C89570}.Debug|x86.ActiveCfg = Debug|Any CPU + {29271A29-9E89-47B1-A0CA-DD6704C89570}.Debug|x86.Build.0 = Debug|Any CPU + {29271A29-9E89-47B1-A0CA-DD6704C89570}.Release|Any CPU.ActiveCfg = Release|Any CPU + {29271A29-9E89-47B1-A0CA-DD6704C89570}.Release|Any CPU.Build.0 = Release|Any CPU + {29271A29-9E89-47B1-A0CA-DD6704C89570}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU + {29271A29-9E89-47B1-A0CA-DD6704C89570}.Release|Mixed Platforms.Build.0 = Release|Any CPU + {29271A29-9E89-47B1-A0CA-DD6704C89570}.Release|x86.ActiveCfg = Release|Any CPU + {29271A29-9E89-47B1-A0CA-DD6704C89570}.Release|x86.Build.0 = Release|Any CPU + {FFA119B2-0D60-4090-B5A6-ECA718138812}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {FFA119B2-0D60-4090-B5A6-ECA718138812}.Debug|Any CPU.Build.0 = Debug|Any CPU + {FFA119B2-0D60-4090-B5A6-ECA718138812}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU + {FFA119B2-0D60-4090-B5A6-ECA718138812}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU + {FFA119B2-0D60-4090-B5A6-ECA718138812}.Debug|x86.ActiveCfg = Debug|Any CPU + {FFA119B2-0D60-4090-B5A6-ECA718138812}.Debug|x86.Build.0 = Debug|Any CPU + {FFA119B2-0D60-4090-B5A6-ECA718138812}.Release|Any CPU.ActiveCfg = Release|Any CPU + {FFA119B2-0D60-4090-B5A6-ECA718138812}.Release|Any CPU.Build.0 = Release|Any CPU + {FFA119B2-0D60-4090-B5A6-ECA718138812}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU + {FFA119B2-0D60-4090-B5A6-ECA718138812}.Release|Mixed Platforms.Build.0 = Release|Any CPU + {FFA119B2-0D60-4090-B5A6-ECA718138812}.Release|x86.ActiveCfg = Release|Any CPU + {FFA119B2-0D60-4090-B5A6-ECA718138812}.Release|x86.Build.0 = Release|Any CPU + {D5ED6642-3E33-493F-9217-FE00E4885699}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {D5ED6642-3E33-493F-9217-FE00E4885699}.Debug|Any CPU.Build.0 = Debug|Any CPU + {D5ED6642-3E33-493F-9217-FE00E4885699}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU + {D5ED6642-3E33-493F-9217-FE00E4885699}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU + {D5ED6642-3E33-493F-9217-FE00E4885699}.Debug|x86.ActiveCfg = Debug|Any CPU + {D5ED6642-3E33-493F-9217-FE00E4885699}.Debug|x86.Build.0 = Debug|Any CPU + {D5ED6642-3E33-493F-9217-FE00E4885699}.Release|Any CPU.ActiveCfg = Release|Any CPU + {D5ED6642-3E33-493F-9217-FE00E4885699}.Release|Any CPU.Build.0 = Release|Any CPU + {D5ED6642-3E33-493F-9217-FE00E4885699}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU + {D5ED6642-3E33-493F-9217-FE00E4885699}.Release|Mixed Platforms.Build.0 = Release|Any CPU + {D5ED6642-3E33-493F-9217-FE00E4885699}.Release|x86.ActiveCfg = Release|Any CPU + {D5ED6642-3E33-493F-9217-FE00E4885699}.Release|x86.Build.0 = Release|Any CPU + {B15BEEDC-A371-46D0-BFF6-63FC8105B520}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {B15BEEDC-A371-46D0-BFF6-63FC8105B520}.Debug|Any CPU.Build.0 = Debug|Any CPU + {B15BEEDC-A371-46D0-BFF6-63FC8105B520}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU + {B15BEEDC-A371-46D0-BFF6-63FC8105B520}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU + {B15BEEDC-A371-46D0-BFF6-63FC8105B520}.Debug|x86.ActiveCfg = Debug|Any CPU + {B15BEEDC-A371-46D0-BFF6-63FC8105B520}.Debug|x86.Build.0 = Debug|Any CPU + {B15BEEDC-A371-46D0-BFF6-63FC8105B520}.Release|Any CPU.ActiveCfg = Release|Any CPU + {B15BEEDC-A371-46D0-BFF6-63FC8105B520}.Release|Any CPU.Build.0 = Release|Any CPU + {B15BEEDC-A371-46D0-BFF6-63FC8105B520}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU + {B15BEEDC-A371-46D0-BFF6-63FC8105B520}.Release|Mixed Platforms.Build.0 = Release|Any CPU + {B15BEEDC-A371-46D0-BFF6-63FC8105B520}.Release|x86.ActiveCfg = Release|Any CPU + {B15BEEDC-A371-46D0-BFF6-63FC8105B520}.Release|x86.Build.0 = Release|Any CPU + {AA2CA9A3-71C0-4D16-B7E7-F6F50E400F23}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {AA2CA9A3-71C0-4D16-B7E7-F6F50E400F23}.Debug|Any CPU.Build.0 = Debug|Any CPU + {AA2CA9A3-71C0-4D16-B7E7-F6F50E400F23}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU + {AA2CA9A3-71C0-4D16-B7E7-F6F50E400F23}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU + {AA2CA9A3-71C0-4D16-B7E7-F6F50E400F23}.Debug|x86.ActiveCfg = Debug|Any CPU + {AA2CA9A3-71C0-4D16-B7E7-F6F50E400F23}.Debug|x86.Build.0 = Debug|Any CPU + {AA2CA9A3-71C0-4D16-B7E7-F6F50E400F23}.Release|Any CPU.ActiveCfg = Release|Any CPU + {AA2CA9A3-71C0-4D16-B7E7-F6F50E400F23}.Release|Any CPU.Build.0 = Release|Any CPU + {AA2CA9A3-71C0-4D16-B7E7-F6F50E400F23}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU + {AA2CA9A3-71C0-4D16-B7E7-F6F50E400F23}.Release|Mixed Platforms.Build.0 = Release|Any CPU + {AA2CA9A3-71C0-4D16-B7E7-F6F50E400F23}.Release|x86.ActiveCfg = Release|Any CPU + {AA2CA9A3-71C0-4D16-B7E7-F6F50E400F23}.Release|x86.Build.0 = Release|Any CPU + {98CE721F-10AF-4665-9B14-3EA2CDF8F4C7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {98CE721F-10AF-4665-9B14-3EA2CDF8F4C7}.Debug|Any CPU.Build.0 = Debug|Any CPU + {98CE721F-10AF-4665-9B14-3EA2CDF8F4C7}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU + {98CE721F-10AF-4665-9B14-3EA2CDF8F4C7}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU + {98CE721F-10AF-4665-9B14-3EA2CDF8F4C7}.Debug|x86.ActiveCfg = Debug|Any CPU + {98CE721F-10AF-4665-9B14-3EA2CDF8F4C7}.Debug|x86.Build.0 = Debug|Any CPU + {98CE721F-10AF-4665-9B14-3EA2CDF8F4C7}.Release|Any CPU.ActiveCfg = Release|Any CPU + {98CE721F-10AF-4665-9B14-3EA2CDF8F4C7}.Release|Any CPU.Build.0 = Release|Any CPU + {98CE721F-10AF-4665-9B14-3EA2CDF8F4C7}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU + {98CE721F-10AF-4665-9B14-3EA2CDF8F4C7}.Release|Mixed Platforms.Build.0 = Release|Any CPU + {98CE721F-10AF-4665-9B14-3EA2CDF8F4C7}.Release|x86.ActiveCfg = Release|Any CPU + {98CE721F-10AF-4665-9B14-3EA2CDF8F4C7}.Release|x86.Build.0 = Release|Any CPU + {99711F8E-C5C1-4864-A51F-3317E19CAD7B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {99711F8E-C5C1-4864-A51F-3317E19CAD7B}.Debug|Any CPU.Build.0 = Debug|Any CPU + {99711F8E-C5C1-4864-A51F-3317E19CAD7B}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU + {99711F8E-C5C1-4864-A51F-3317E19CAD7B}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU + {99711F8E-C5C1-4864-A51F-3317E19CAD7B}.Debug|x86.ActiveCfg = Debug|Any CPU + {99711F8E-C5C1-4864-A51F-3317E19CAD7B}.Debug|x86.Build.0 = Debug|Any CPU + {99711F8E-C5C1-4864-A51F-3317E19CAD7B}.Release|Any CPU.ActiveCfg = Release|Any CPU + {99711F8E-C5C1-4864-A51F-3317E19CAD7B}.Release|Any CPU.Build.0 = Release|Any CPU + {99711F8E-C5C1-4864-A51F-3317E19CAD7B}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU + {99711F8E-C5C1-4864-A51F-3317E19CAD7B}.Release|Mixed Platforms.Build.0 = Release|Any CPU + {99711F8E-C5C1-4864-A51F-3317E19CAD7B}.Release|x86.ActiveCfg = Release|Any CPU + {99711F8E-C5C1-4864-A51F-3317E19CAD7B}.Release|x86.Build.0 = Release|Any CPU + {8207A628-6285-4DDF-B846-C0C7ED3E3D16}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {8207A628-6285-4DDF-B846-C0C7ED3E3D16}.Debug|Any CPU.Build.0 = Debug|Any CPU + {8207A628-6285-4DDF-B846-C0C7ED3E3D16}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU + {8207A628-6285-4DDF-B846-C0C7ED3E3D16}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU + {8207A628-6285-4DDF-B846-C0C7ED3E3D16}.Debug|x86.ActiveCfg = Debug|Any CPU + {8207A628-6285-4DDF-B846-C0C7ED3E3D16}.Debug|x86.Build.0 = Debug|Any CPU + {8207A628-6285-4DDF-B846-C0C7ED3E3D16}.Release|Any CPU.ActiveCfg = Release|Any CPU + {8207A628-6285-4DDF-B846-C0C7ED3E3D16}.Release|Any CPU.Build.0 = Release|Any CPU + {8207A628-6285-4DDF-B846-C0C7ED3E3D16}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU + {8207A628-6285-4DDF-B846-C0C7ED3E3D16}.Release|Mixed Platforms.Build.0 = Release|Any CPU + {8207A628-6285-4DDF-B846-C0C7ED3E3D16}.Release|x86.ActiveCfg = Release|Any CPU + {8207A628-6285-4DDF-B846-C0C7ED3E3D16}.Release|x86.Build.0 = Release|Any CPU + {04264DDD-C204-4F59-88D4-FB4C69BD80C3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {04264DDD-C204-4F59-88D4-FB4C69BD80C3}.Debug|Any CPU.Build.0 = Debug|Any CPU + {04264DDD-C204-4F59-88D4-FB4C69BD80C3}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU + {04264DDD-C204-4F59-88D4-FB4C69BD80C3}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU + {04264DDD-C204-4F59-88D4-FB4C69BD80C3}.Debug|x86.ActiveCfg = Debug|Any CPU + {04264DDD-C204-4F59-88D4-FB4C69BD80C3}.Debug|x86.Build.0 = Debug|Any CPU + {04264DDD-C204-4F59-88D4-FB4C69BD80C3}.Release|Any CPU.ActiveCfg = Release|Any CPU + {04264DDD-C204-4F59-88D4-FB4C69BD80C3}.Release|Any CPU.Build.0 = Release|Any CPU + {04264DDD-C204-4F59-88D4-FB4C69BD80C3}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU + {04264DDD-C204-4F59-88D4-FB4C69BD80C3}.Release|Mixed Platforms.Build.0 = Release|Any CPU + {04264DDD-C204-4F59-88D4-FB4C69BD80C3}.Release|x86.ActiveCfg = Release|Any CPU + {04264DDD-C204-4F59-88D4-FB4C69BD80C3}.Release|x86.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection + GlobalSection(NestedProjects) = preSolution + {FFA119B2-0D60-4090-B5A6-ECA718138812} = {0FAEE4F6-D72F-4B18-869A-7A90BAC1280F} + {D5ED6642-3E33-493F-9217-FE00E4885699} = {0FAEE4F6-D72F-4B18-869A-7A90BAC1280F} + {B15BEEDC-A371-46D0-BFF6-63FC8105B520} = {0FAEE4F6-D72F-4B18-869A-7A90BAC1280F} + {AA2CA9A3-71C0-4D16-B7E7-F6F50E400F23} = {0FAEE4F6-D72F-4B18-869A-7A90BAC1280F} + {98CE721F-10AF-4665-9B14-3EA2CDF8F4C7} = {0FAEE4F6-D72F-4B18-869A-7A90BAC1280F} + {99711F8E-C5C1-4864-A51F-3317E19CAD7B} = {0FAEE4F6-D72F-4B18-869A-7A90BAC1280F} + {8207A628-6285-4DDF-B846-C0C7ED3E3D16} = {0FAEE4F6-D72F-4B18-869A-7A90BAC1280F} + {04264DDD-C204-4F59-88D4-FB4C69BD80C3} = {0FAEE4F6-D72F-4B18-869A-7A90BAC1280F} + EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {ACE75CE8-16B2-4C6E-A5BE-B6F6DB5FE095} EndGlobalSection diff --git a/lang/csharp/CODECOVERAGE.md b/lang/csharp/CODECOVERAGE.md new file mode 100644 index 00000000000..c06cc5af7f1 --- /dev/null +++ b/lang/csharp/CODECOVERAGE.md @@ -0,0 +1,31 @@ + +# C# Avro Code Coverage + +The following instructions should be followed in order to create a code coverage report locally. + +1. Open a command prompt +2. Install ReportGenerator globally\ + a. Run the following command line: `dotnet tool install --global dotnet-reportgenerator-globaltool --version 5.1.4 --add-source https://www.nuget.org/packages/`\ + b. The latest version can be found at [Nuget ReportGenerator](https://www.nuget.org/packages/dotnet-reportgenerator-globaltool/) +3. Navigate to the test project `avro\lang\csharp\src\apache\test` +4. Run the following test command `dotnet test --results-directory ./TestResults --collect:"XPlat Code Coverage"` +5. Generate the report with the following command `ReportGenerator "-reports:./TestResults/*/coverage.cobertura.xml" "-targetdir:./Coverage/" -reporttypes:HTML` +6. Open Report under `avro\lang\csharp\src\apache\test\Coverage\index.html` diff --git a/lang/csharp/CodeAnalysis.src.globalconfig b/lang/csharp/CodeAnalysis.src.globalconfig new file mode 100644 index 00000000000..2537599edcb --- /dev/null +++ b/lang/csharp/CodeAnalysis.src.globalconfig @@ -0,0 +1,1585 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# !!! Original: https://github.com/dotnet/runtime/blob/main/eng/CodeAnalysis.src.globalconfig +# !!! Any overrides should be added to the end of the file + +is_global = true + +# AD0001: Analyzer threw an exception +dotnet_diagnostic.AD0001.severity = suggestion + +# BCL0001: Ensure minimum API surface is respected +dotnet_diagnostic.BCL0001.severity = warning + +# BCL0010: AppContext default value expected to be true +dotnet_diagnostic.BCL0010.severity = warning + +# BCL0011: AppContext default value defined in if statement with incorrect pattern +dotnet_diagnostic.BCL0011.severity = warning + +# BCL0012: AppContext default value defined in if statement at root of switch case +dotnet_diagnostic.BCL0012.severity = warning + +# BCL0015: Invalid P/Invoke call +dotnet_diagnostic.BCL0015.severity = none + +# BCL0020: Invalid SR.Format call +dotnet_diagnostic.BCL0020.severity = warning + +# CA1000: Do not declare static members on generic types +dotnet_diagnostic.CA1000.severity = none + +# CA1001: Types that own disposable fields should be disposable +dotnet_diagnostic.CA1001.severity = none + +# CA1002: Do not expose generic lists +dotnet_diagnostic.CA1002.severity = none + +# CA1003: Use generic event handler instances +dotnet_diagnostic.CA1003.severity = none + +# CA1005: Avoid excessive parameters on generic types +dotnet_diagnostic.CA1005.severity = none + +# CA1008: Enums should have zero value +dotnet_diagnostic.CA1008.severity = none + +# CA1010: Generic interface should also be implemented +dotnet_diagnostic.CA1010.severity = none + +# CA1012: Abstract types should not have public constructors +dotnet_diagnostic.CA1012.severity = none + +# CA1014: Mark assemblies with CLSCompliant +dotnet_diagnostic.CA1014.severity = none + +# CA1016: Mark assemblies with assembly version +dotnet_diagnostic.CA1016.severity = none + +# CA1017: Mark assemblies with ComVisible +dotnet_diagnostic.CA1017.severity = none + +# CA1018: Mark attributes with AttributeUsageAttribute +dotnet_diagnostic.CA1018.severity = warning + +# CA1019: Define accessors for attribute arguments +dotnet_diagnostic.CA1019.severity = none + +# CA1021: Avoid out parameters +dotnet_diagnostic.CA1021.severity = none + +# CA1024: Use properties where appropriate +dotnet_diagnostic.CA1024.severity = none + +# CA1027: Mark enums with FlagsAttribute +dotnet_diagnostic.CA1027.severity = none + +# CA1028: Enum Storage should be Int32 +dotnet_diagnostic.CA1028.severity = none + +# CA1030: Use events where appropriate +dotnet_diagnostic.CA1030.severity = none + +# CA1031: Do not catch general exception types +dotnet_diagnostic.CA1031.severity = none + +# CA1032: Implement standard exception constructors +dotnet_diagnostic.CA1032.severity = none + +# CA1033: Interface methods should be callable by child types +dotnet_diagnostic.CA1033.severity = none + +# CA1034: Nested types should not be visible +dotnet_diagnostic.CA1034.severity = none + +# CA1036: Override methods on comparable types +dotnet_diagnostic.CA1036.severity = none + +# CA1040: Avoid empty interfaces +dotnet_diagnostic.CA1040.severity = none + +# CA1041: Provide ObsoleteAttribute message +dotnet_diagnostic.CA1041.severity = none + +# CA1043: Use Integral Or String Argument For Indexers +dotnet_diagnostic.CA1043.severity = none + +# CA1044: Properties should not be write only +dotnet_diagnostic.CA1044.severity = none + +# CA1045: Do not pass types by reference +dotnet_diagnostic.CA1045.severity = none + +# CA1046: Do not overload equality operator on reference types +dotnet_diagnostic.CA1046.severity = none + +# CA1047: Do not declare protected member in sealed type +dotnet_diagnostic.CA1047.severity = warning + +# CA1050: Declare types in namespaces +dotnet_diagnostic.CA1050.severity = warning + +# CA1051: Do not declare visible instance fields +dotnet_diagnostic.CA1051.severity = none + +# CA1052: Static holder types should be Static or NotInheritable +dotnet_diagnostic.CA1052.severity = warning +dotnet_code_quality.CA1052.api_surface = private, internal + +# CA1054: URI-like parameters should not be strings +dotnet_diagnostic.CA1054.severity = none + +# CA1055: URI-like return values should not be strings +dotnet_diagnostic.CA1055.severity = none + +# CA1056: URI-like properties should not be strings +dotnet_diagnostic.CA1056.severity = none + +# CA1058: Types should not extend certain base types +dotnet_diagnostic.CA1058.severity = none + +# CA1060: Move pinvokes to native methods class +dotnet_diagnostic.CA1060.severity = none + +# CA1061: Do not hide base class methods +dotnet_diagnostic.CA1061.severity = none + +# CA1062: Validate arguments of public methods +dotnet_diagnostic.CA1062.severity = none + +# CA1063: Implement IDisposable Correctly +dotnet_diagnostic.CA1063.severity = none + +# CA1064: Exceptions should be public +dotnet_diagnostic.CA1064.severity = none + +# CA1065: Do not raise exceptions in unexpected locations +dotnet_diagnostic.CA1065.severity = none + +# CA1066: Implement IEquatable when overriding Object.Equals +dotnet_diagnostic.CA1066.severity = warning + +# CA1067: Override Object.Equals(object) when implementing IEquatable +dotnet_diagnostic.CA1067.severity = warning + +# CA1068: CancellationToken parameters must come last +dotnet_diagnostic.CA1068.severity = none + +# CA1069: Enums values should not be duplicated +dotnet_diagnostic.CA1069.severity = none + +# CA1070: Do not declare event fields as virtual +dotnet_diagnostic.CA1070.severity = suggestion + +# CA1200: Avoid using cref tags with a prefix +dotnet_diagnostic.CA1200.severity = suggestion + +# CA1303: Do not pass literals as localized parameters +dotnet_diagnostic.CA1303.severity = none + +# CA1304: Specify CultureInfo +dotnet_diagnostic.CA1304.severity = none + +# CA1305: Specify IFormatProvider +dotnet_diagnostic.CA1305.severity = none + +# CA1307: Specify StringComparison for clarity +dotnet_diagnostic.CA1307.severity = none + +# CA1308: Normalize strings to uppercase +dotnet_diagnostic.CA1308.severity = none + +# CA1309: Use ordinal string comparison +dotnet_diagnostic.CA1309.severity = none + +# CA1310: Specify StringComparison for correctness +dotnet_diagnostic.CA1310.severity = suggestion + +# CA1401: P/Invokes should not be visible +dotnet_diagnostic.CA1401.severity = warning + +# CA1416: Validate platform compatibility +dotnet_diagnostic.CA1416.severity = warning + +# CA1417: Do not use 'OutAttribute' on string parameters for P/Invokes +dotnet_diagnostic.CA1417.severity = warning + +# CA1418: Use valid platform string +dotnet_diagnostic.CA1418.severity = warning + +# CA1419: Provide a parameterless constructor that is as visible as the containing type for concrete types derived from 'System.Runtime.InteropServices.SafeHandle' +dotnet_diagnostic.CA1419.severity = warning + +# CA1501: Avoid excessive inheritance +dotnet_diagnostic.CA1501.severity = none + +# CA1502: Avoid excessive complexity +dotnet_diagnostic.CA1502.severity = none + +# CA1505: Avoid unmaintainable code +dotnet_diagnostic.CA1505.severity = none + +# CA1506: Avoid excessive class coupling +dotnet_diagnostic.CA1506.severity = none + +# CA1507: Use nameof to express symbol names +dotnet_diagnostic.CA1507.severity = warning + +# CA1508: Avoid dead conditional code +dotnet_diagnostic.CA1508.severity = none + +# CA1509: Invalid entry in code metrics rule specification file +dotnet_diagnostic.CA1509.severity = none + +# CA1700: Do not name enum values 'Reserved' +dotnet_diagnostic.CA1700.severity = none + +# CA1707: Identifiers should not contain underscores +dotnet_diagnostic.CA1707.severity = none + +# CA1708: Identifiers should differ by more than case +dotnet_diagnostic.CA1708.severity = none + +# CA1710: Identifiers should have correct suffix +dotnet_diagnostic.CA1710.severity = none + +# CA1711: Identifiers should not have incorrect suffix +dotnet_diagnostic.CA1711.severity = none + +# CA1712: Do not prefix enum values with type name +dotnet_diagnostic.CA1712.severity = none + +# CA1713: Events should not have 'Before' or 'After' prefix +dotnet_diagnostic.CA1713.severity = none + +# CA1715: Identifiers should have correct prefix +dotnet_diagnostic.CA1715.severity = none + +# CA1716: Identifiers should not match keywords +dotnet_diagnostic.CA1716.severity = none + +# CA1720: Identifier contains type name +dotnet_diagnostic.CA1720.severity = none + +# CA1721: Property names should not match get methods +dotnet_diagnostic.CA1721.severity = none + +# CA1724: Type names should not match namespaces +dotnet_diagnostic.CA1724.severity = none + +# CA1725: Parameter names should match base declaration +dotnet_diagnostic.CA1725.severity = suggestion + +# CA1727: Use PascalCase for named placeholders +dotnet_diagnostic.CA1727.severity = suggestion + +# CA1802: Use literals where appropriate +dotnet_diagnostic.CA1802.severity = warning +dotnet_code_quality.CA1802.api_surface = private, internal + +# CA1805: Do not initialize unnecessarily +dotnet_diagnostic.CA1805.severity = warning + +# CA1806: Do not ignore method results +dotnet_diagnostic.CA1806.severity = none + +# CA1810: Initialize reference type static fields inline +dotnet_diagnostic.CA1810.severity = warning + +# CA1812: Avoid uninstantiated internal classes +dotnet_diagnostic.CA1812.severity = none + +# CA1813: Avoid unsealed attributes +dotnet_diagnostic.CA1813.severity = none + +# CA1814: Prefer jagged arrays over multidimensional +dotnet_diagnostic.CA1814.severity = none + +# CA1815: Override equals and operator equals on value types +dotnet_diagnostic.CA1815.severity = none + +# CA1816: Dispose methods should call SuppressFinalize +dotnet_diagnostic.CA1816.severity = none + +# CA1819: Properties should not return arrays +dotnet_diagnostic.CA1819.severity = none + +# CA1820: Test for empty strings using string length +dotnet_diagnostic.CA1820.severity = none + +# CA1821: Remove empty Finalizers +dotnet_diagnostic.CA1821.severity = warning + +# CA1822: Mark members as static +dotnet_diagnostic.CA1822.severity = none + +# CA1823: Avoid unused private fields +dotnet_diagnostic.CA1823.severity = warning + +# CA1824: Mark assemblies with NeutralResourcesLanguageAttribute +dotnet_diagnostic.CA1824.severity = warning + +# CA1825: Avoid zero-length array allocations +dotnet_diagnostic.CA1825.severity = warning + +# CA1826: Do not use Enumerable methods on indexable collections +dotnet_diagnostic.CA1826.severity = warning + +# CA1827: Do not use Count() or LongCount() when Any() can be used +dotnet_diagnostic.CA1827.severity = warning + +# CA1828: Do not use CountAsync() or LongCountAsync() when AnyAsync() can be used +dotnet_diagnostic.CA1828.severity = warning + +# CA1829: Use Length/Count property instead of Count() when available +dotnet_diagnostic.CA1829.severity = warning + +# CA1830: Prefer strongly-typed Append and Insert method overloads on StringBuilder +dotnet_diagnostic.CA1830.severity = warning + +# CA1831: Use AsSpan or AsMemory instead of Range-based indexers when appropriate +dotnet_diagnostic.CA1831.severity = warning + +# CA1832: Use AsSpan or AsMemory instead of Range-based indexers when appropriate +dotnet_diagnostic.CA1832.severity = warning + +# CA1833: Use AsSpan or AsMemory instead of Range-based indexers when appropriate +dotnet_diagnostic.CA1833.severity = warning + +# CA1834: Consider using 'StringBuilder.Append(char)' when applicable +dotnet_diagnostic.CA1834.severity = warning + +# CA1835: Prefer the 'Memory'-based overloads for 'ReadAsync' and 'WriteAsync' +dotnet_diagnostic.CA1835.severity = warning + +# CA1836: Prefer IsEmpty over Count +dotnet_diagnostic.CA1836.severity = warning + +# CA1837: Use 'Environment.ProcessId' +dotnet_diagnostic.CA1837.severity = warning + +# CA1838: Avoid 'StringBuilder' parameters for P/Invokes +dotnet_diagnostic.CA1838.severity = warning + +# CA1839: Use 'Environment.ProcessPath' +dotnet_diagnostic.CA1839.severity = warning + +# CA1840: Use 'Environment.CurrentManagedThreadId' +dotnet_diagnostic.CA1840.severity = warning + +# CA1841: Prefer Dictionary.Contains methods +dotnet_diagnostic.CA1841.severity = warning + +# CA1842: Do not use 'WhenAll' with a single task +dotnet_diagnostic.CA1842.severity = warning + +# CA1843: Do not use 'WaitAll' with a single task +dotnet_diagnostic.CA1843.severity = warning + +# CA1844: Provide memory-based overrides of async methods when subclassing 'Stream' +dotnet_diagnostic.CA1844.severity = warning + +# CA1845: Use span-based 'string.Concat' +dotnet_diagnostic.CA1845.severity = warning + +# CA1846: Prefer 'AsSpan' over 'Substring' +dotnet_diagnostic.CA1846.severity = warning + +# CA1847: Use char literal for a single character lookup +dotnet_diagnostic.CA1847.severity = warning + +# CA1848: Use the LoggerMessage delegates +dotnet_diagnostic.CA1848.severity = none + +# CA1849: Call async methods when in an async method +dotnet_diagnostic.CA1849.severity = suggestion + +# CA1850: Prefer static 'HashData' method over 'ComputeHash' +dotnet_diagnostic.CA1850.severity = warning + +# CA2000: Dispose objects before losing scope +dotnet_diagnostic.CA2000.severity = none + +# CA2002: Do not lock on objects with weak identity +dotnet_diagnostic.CA2002.severity = none + +# CA2007: Consider calling ConfigureAwait on the awaited task +dotnet_diagnostic.CA2007.severity = warning + +# CA2008: Do not create tasks without passing a TaskScheduler +dotnet_diagnostic.CA2008.severity = warning + +# CA2009: Do not call ToImmutableCollection on an ImmutableCollection value +dotnet_diagnostic.CA2009.severity = warning + +# CA2011: Avoid infinite recursion +dotnet_diagnostic.CA2011.severity = warning + +# CA2012: Use ValueTasks correctly +dotnet_diagnostic.CA2012.severity = warning + +# CA2013: Do not use ReferenceEquals with value types +dotnet_diagnostic.CA2013.severity = warning + +# CA2014: Do not use stackalloc in loops +dotnet_diagnostic.CA2014.severity = warning + +# CA2015: Do not define finalizers for types derived from MemoryManager +dotnet_diagnostic.CA2015.severity = warning + +# CA2016: Forward the 'CancellationToken' parameter to methods +dotnet_diagnostic.CA2016.severity = warning + +# CA2017: Parameter count mismatch +dotnet_diagnostic.CA2017.severity = warning + +# CA2018: 'Buffer.BlockCopy' expects the number of bytes to be copied for the 'count' argument +dotnet_diagnostic.CA2018.severity = warning + +# CA2100: Review SQL queries for security vulnerabilities +dotnet_diagnostic.CA2100.severity = none + +# CA2101: Specify marshaling for P/Invoke string arguments +dotnet_diagnostic.CA2101.severity = none + +# CA2109: Review visible event handlers +dotnet_diagnostic.CA2109.severity = none + +# CA2119: Seal methods that satisfy private interfaces +dotnet_diagnostic.CA2119.severity = none + +# CA2153: Do Not Catch Corrupted State Exceptions +dotnet_diagnostic.CA2153.severity = none + +# CA2200: Rethrow to preserve stack details +dotnet_diagnostic.CA2200.severity = warning + +# CA2201: Do not raise reserved exception types +dotnet_diagnostic.CA2201.severity = none + +# CA2207: Initialize value type static fields inline +dotnet_diagnostic.CA2207.severity = warning + +# CA2208: Instantiate argument exceptions correctly +dotnet_diagnostic.CA2208.severity = warning +dotnet_code_quality.CA2208.api_surface = public + +# CA2211: Non-constant fields should not be visible +dotnet_diagnostic.CA2211.severity = none + +# CA2213: Disposable fields should be disposed +dotnet_diagnostic.CA2213.severity = none + +# CA2214: Do not call overridable methods in constructors +dotnet_diagnostic.CA2214.severity = none + +# CA2215: Dispose methods should call base class dispose +dotnet_diagnostic.CA2215.severity = none + +# CA2216: Disposable types should declare finalizer +dotnet_diagnostic.CA2216.severity = none + +# CA2217: Do not mark enums with FlagsAttribute +dotnet_diagnostic.CA2217.severity = none + +# CA2218: Override GetHashCode on overriding Equals +dotnet_diagnostic.CA2218.severity = none + +# CA2219: Do not raise exceptions in finally clauses +dotnet_diagnostic.CA2219.severity = none + +# CA2224: Override Equals on overloading operator equals +dotnet_diagnostic.CA2224.severity = none + +# CA2225: Operator overloads have named alternates +dotnet_diagnostic.CA2225.severity = none + +# CA2226: Operators should have symmetrical overloads +dotnet_diagnostic.CA2226.severity = none + +# CA2227: Collection properties should be read only +dotnet_diagnostic.CA2227.severity = none + +# CA2229: Implement serialization constructors +dotnet_diagnostic.CA2229.severity = warning + +# CA2231: Overload operator equals on overriding value type Equals +dotnet_diagnostic.CA2231.severity = none + +# CA2234: Pass system uri objects instead of strings +dotnet_diagnostic.CA2234.severity = none + +# CA2235: Mark all non-serializable fields +dotnet_diagnostic.CA2235.severity = none + +# CA2237: Mark ISerializable types with serializable +dotnet_diagnostic.CA2237.severity = none + +# CA2241: Provide correct arguments to formatting methods +dotnet_diagnostic.CA2241.severity = warning + +# CA2242: Test for NaN correctly +dotnet_diagnostic.CA2242.severity = warning + +# CA2243: Attribute string literals should parse correctly +dotnet_diagnostic.CA2243.severity = warning + +# CA2244: Do not duplicate indexed element initializations +dotnet_diagnostic.CA2244.severity = warning + +# CA2245: Do not assign a property to itself +dotnet_diagnostic.CA2245.severity = warning + +# CA2246: Assigning symbol and its member in the same statement +dotnet_diagnostic.CA2246.severity = warning + +# CA2247: Argument passed to TaskCompletionSource constructor should be TaskCreationOptions enum instead of TaskContinuationOptions enum +dotnet_diagnostic.CA2247.severity = warning + +# CA2248: Provide correct 'enum' argument to 'Enum.HasFlag' +dotnet_diagnostic.CA2248.severity = warning + +# CA2249: Consider using 'string.Contains' instead of 'string.IndexOf' +dotnet_diagnostic.CA2249.severity = warning + +# CA2250: Use 'ThrowIfCancellationRequested' +dotnet_diagnostic.CA2250.severity = warning + +# CA2251: Use 'string.Equals' +dotnet_diagnostic.CA2251.severity = warning + +# CA2252: This API requires opting into preview features +dotnet_diagnostic.CA2252.severity = error + +# CA2253: Named placeholders should not be numeric values +dotnet_diagnostic.CA2253.severity = none + +# CA2254: Template should be a static expression +dotnet_diagnostic.CA2254.severity = none + +# CA2255: The 'ModuleInitializer' attribute should not be used in libraries +dotnet_diagnostic.CA2255.severity = warning + +# CA2256: All members declared in parent interfaces must have an implementation in a DynamicInterfaceCastableImplementation-attributed interface +dotnet_diagnostic.CA2256.severity = warning + +# CA2257: Members defined on an interface with the 'DynamicInterfaceCastableImplementationAttribute' should be 'static' +dotnet_diagnostic.CA2257.severity = warning + +# CA2258: Providing a 'DynamicInterfaceCastableImplementation' interface in Visual Basic is unsupported +dotnet_diagnostic.CA2258.severity = warning + +# CA2300: Do not use insecure deserializer BinaryFormatter +dotnet_diagnostic.CA2300.severity = none + +# CA2301: Do not call BinaryFormatter.Deserialize without first setting BinaryFormatter.Binder +dotnet_diagnostic.CA2301.severity = none + +# CA2302: Ensure BinaryFormatter.Binder is set before calling BinaryFormatter.Deserialize +dotnet_diagnostic.CA2302.severity = none + +# CA2305: Do not use insecure deserializer LosFormatter +dotnet_diagnostic.CA2305.severity = none + +# CA2310: Do not use insecure deserializer NetDataContractSerializer +dotnet_diagnostic.CA2310.severity = none + +# CA2311: Do not deserialize without first setting NetDataContractSerializer.Binder +dotnet_diagnostic.CA2311.severity = none + +# CA2312: Ensure NetDataContractSerializer.Binder is set before deserializing +dotnet_diagnostic.CA2312.severity = none + +# CA2315: Do not use insecure deserializer ObjectStateFormatter +dotnet_diagnostic.CA2315.severity = none + +# CA2321: Do not deserialize with JavaScriptSerializer using a SimpleTypeResolver +dotnet_diagnostic.CA2321.severity = none + +# CA2322: Ensure JavaScriptSerializer is not initialized with SimpleTypeResolver before deserializing +dotnet_diagnostic.CA2322.severity = none + +# CA2326: Do not use TypeNameHandling values other than None +dotnet_diagnostic.CA2326.severity = none + +# CA2327: Do not use insecure JsonSerializerSettings +dotnet_diagnostic.CA2327.severity = none + +# CA2328: Ensure that JsonSerializerSettings are secure +dotnet_diagnostic.CA2328.severity = none + +# CA2329: Do not deserialize with JsonSerializer using an insecure configuration +dotnet_diagnostic.CA2329.severity = none + +# CA2330: Ensure that JsonSerializer has a secure configuration when deserializing +dotnet_diagnostic.CA2330.severity = none + +# CA2350: Do not use DataTable.ReadXml() with untrusted data +dotnet_diagnostic.CA2350.severity = none + +# CA2351: Do not use DataSet.ReadXml() with untrusted data +dotnet_diagnostic.CA2351.severity = none + +# CA2352: Unsafe DataSet or DataTable in serializable type can be vulnerable to remote code execution attacks +dotnet_diagnostic.CA2352.severity = none + +# CA2353: Unsafe DataSet or DataTable in serializable type +dotnet_diagnostic.CA2353.severity = none + +# CA2354: Unsafe DataSet or DataTable in deserialized object graph can be vulnerable to remote code execution attacks +dotnet_diagnostic.CA2354.severity = none + +# CA2355: Unsafe DataSet or DataTable type found in deserializable object graph +dotnet_diagnostic.CA2355.severity = none + +# CA2356: Unsafe DataSet or DataTable type in web deserializable object graph +dotnet_diagnostic.CA2356.severity = none + +# CA2361: Ensure auto-generated class containing DataSet.ReadXml() is not used with untrusted data +dotnet_diagnostic.CA2361.severity = none + +# CA2362: Unsafe DataSet or DataTable in auto-generated serializable type can be vulnerable to remote code execution attacks +dotnet_diagnostic.CA2362.severity = none + +# CA3001: Review code for SQL injection vulnerabilities +dotnet_diagnostic.CA3001.severity = none + +# CA3002: Review code for XSS vulnerabilities +dotnet_diagnostic.CA3002.severity = none + +# CA3003: Review code for file path injection vulnerabilities +dotnet_diagnostic.CA3003.severity = none + +# CA3004: Review code for information disclosure vulnerabilities +dotnet_diagnostic.CA3004.severity = none + +# CA3005: Review code for LDAP injection vulnerabilities +dotnet_diagnostic.CA3005.severity = none + +# CA3006: Review code for process command injection vulnerabilities +dotnet_diagnostic.CA3006.severity = none + +# CA3007: Review code for open redirect vulnerabilities +dotnet_diagnostic.CA3007.severity = none + +# CA3008: Review code for XPath injection vulnerabilities +dotnet_diagnostic.CA3008.severity = none + +# CA3009: Review code for XML injection vulnerabilities +dotnet_diagnostic.CA3009.severity = none + +# CA3010: Review code for XAML injection vulnerabilities +dotnet_diagnostic.CA3010.severity = none + +# CA3011: Review code for DLL injection vulnerabilities +dotnet_diagnostic.CA3011.severity = none + +# CA3012: Review code for regex injection vulnerabilities +dotnet_diagnostic.CA3012.severity = none + +# CA3061: Do Not Add Schema By URL +dotnet_diagnostic.CA3061.severity = warning + +# CA3075: Insecure DTD processing in XML +dotnet_diagnostic.CA3075.severity = warning + +# CA3076: Insecure XSLT script processing. +dotnet_diagnostic.CA3076.severity = warning + +# CA3077: Insecure Processing in API Design, XmlDocument and XmlTextReader +dotnet_diagnostic.CA3077.severity = warning + +# CA3147: Mark Verb Handlers With Validate Antiforgery Token +dotnet_diagnostic.CA3147.severity = warning + +# CA5350: Do Not Use Weak Cryptographic Algorithms +dotnet_diagnostic.CA5350.severity = warning + +# CA5351: Do Not Use Broken Cryptographic Algorithms +dotnet_diagnostic.CA5351.severity = warning + +# CA5358: Review cipher mode usage with cryptography experts +dotnet_diagnostic.CA5358.severity = none + +# CA5359: Do Not Disable Certificate Validation +dotnet_diagnostic.CA5359.severity = warning + +# CA5360: Do Not Call Dangerous Methods In Deserialization +dotnet_diagnostic.CA5360.severity = warning + +# CA5361: Do Not Disable SChannel Use of Strong Crypto +dotnet_diagnostic.CA5361.severity = warning + +# CA5362: Potential reference cycle in deserialized object graph +dotnet_diagnostic.CA5362.severity = none + +# CA5363: Do Not Disable Request Validation +dotnet_diagnostic.CA5363.severity = warning + +# CA5364: Do Not Use Deprecated Security Protocols +dotnet_diagnostic.CA5364.severity = warning + +# CA5365: Do Not Disable HTTP Header Checking +dotnet_diagnostic.CA5365.severity = warning + +# CA5366: Use XmlReader for 'DataSet.ReadXml()' +dotnet_diagnostic.CA5366.severity = none + +# CA5367: Do Not Serialize Types With Pointer Fields +dotnet_diagnostic.CA5367.severity = none + +# CA5368: Set ViewStateUserKey For Classes Derived From Page +dotnet_diagnostic.CA5368.severity = warning + +# CA5369: Use XmlReader for 'XmlSerializer.Deserialize()' +dotnet_diagnostic.CA5369.severity = none + +# CA5370: Use XmlReader for XmlValidatingReader constructor +dotnet_diagnostic.CA5370.severity = warning + +# CA5371: Use XmlReader for 'XmlSchema.Read()' +dotnet_diagnostic.CA5371.severity = none + +# CA5372: Use XmlReader for XPathDocument constructor +dotnet_diagnostic.CA5372.severity = none + +# CA5373: Do not use obsolete key derivation function +dotnet_diagnostic.CA5373.severity = warning + +# CA5374: Do Not Use XslTransform +dotnet_diagnostic.CA5374.severity = warning + +# CA5375: Do Not Use Account Shared Access Signature +dotnet_diagnostic.CA5375.severity = none + +# CA5376: Use SharedAccessProtocol HttpsOnly +dotnet_diagnostic.CA5376.severity = warning + +# CA5377: Use Container Level Access Policy +dotnet_diagnostic.CA5377.severity = warning + +# CA5378: Do not disable ServicePointManagerSecurityProtocols +dotnet_diagnostic.CA5378.severity = warning + +# CA5379: Ensure Key Derivation Function algorithm is sufficiently strong +dotnet_diagnostic.CA5379.severity = warning + +# CA5380: Do Not Add Certificates To Root Store +dotnet_diagnostic.CA5380.severity = warning + +# CA5381: Ensure Certificates Are Not Added To Root Store +dotnet_diagnostic.CA5381.severity = warning + +# CA5382: Use Secure Cookies In ASP.NET Core +dotnet_diagnostic.CA5382.severity = none + +# CA5383: Ensure Use Secure Cookies In ASP.NET Core +dotnet_diagnostic.CA5383.severity = none + +# CA5384: Do Not Use Digital Signature Algorithm (DSA) +dotnet_diagnostic.CA5384.severity = warning + +# CA5385: Use RivestīŋŊShamirīŋŊAdleman (RSA) Algorithm With Sufficient Key Size +dotnet_diagnostic.CA5385.severity = warning + +# CA5386: Avoid hardcoding SecurityProtocolType value +dotnet_diagnostic.CA5386.severity = none + +# CA5387: Do Not Use Weak Key Derivation Function With Insufficient Iteration Count +dotnet_diagnostic.CA5387.severity = none + +# CA5388: Ensure Sufficient Iteration Count When Using Weak Key Derivation Function +dotnet_diagnostic.CA5388.severity = none + +# CA5389: Do Not Add Archive Item's Path To The Target File System Path +dotnet_diagnostic.CA5389.severity = none + +# CA5390: Do not hard-code encryption key +dotnet_diagnostic.CA5390.severity = none + +# CA5391: Use antiforgery tokens in ASP.NET Core MVC controllers +dotnet_diagnostic.CA5391.severity = none + +# CA5392: Use DefaultDllImportSearchPaths attribute for P/Invokes +dotnet_diagnostic.CA5392.severity = none + +# CA5393: Do not use unsafe DllImportSearchPath value +dotnet_diagnostic.CA5393.severity = none + +# CA5394: Do not use insecure randomness +dotnet_diagnostic.CA5394.severity = none + +# CA5395: Miss HttpVerb attribute for action methods +dotnet_diagnostic.CA5395.severity = none + +# CA5396: Set HttpOnly to true for HttpCookie +dotnet_diagnostic.CA5396.severity = none + +# CA5397: Do not use deprecated SslProtocols values +dotnet_diagnostic.CA5397.severity = none + +# CA5398: Avoid hardcoded SslProtocols values +dotnet_diagnostic.CA5398.severity = none + +# CA5399: HttpClients should enable certificate revocation list checks +dotnet_diagnostic.CA5399.severity = none + +# CA5400: Ensure HttpClient certificate revocation list check is not disabled +dotnet_diagnostic.CA5400.severity = none + +# CA5401: Do not use CreateEncryptor with non-default IV +dotnet_diagnostic.CA5401.severity = none + +# CA5402: Use CreateEncryptor with the default IV +dotnet_diagnostic.CA5402.severity = none + +# CA5403: Do not hard-code certificate +dotnet_diagnostic.CA5403.severity = none + +# CA5404: Do not disable token validation checks +dotnet_diagnostic.CA5404.severity = none + +# CA5405: Do not always skip token validation in delegates +dotnet_diagnostic.CA5405.severity = none + +# IL3000: Avoid using accessing Assembly file path when publishing as a single-file +dotnet_diagnostic.IL3000.severity = warning + +# IL3001: Avoid using accessing Assembly file path when publishing as a single-file +dotnet_diagnostic.IL3001.severity = warning + +# IL3002: Using member with RequiresAssemblyFilesAttribute can break functionality when embedded in a single-file app +dotnet_diagnostic.IL3002.severity = warning + +# SA0001: XML comments +dotnet_diagnostic.SA0001.severity = none + +# SA1000: Spacing around keywords +dotnet_diagnostic.SA1000.severity = warning + +# SA1001: Commas should not be preceded by whitespace +dotnet_diagnostic.SA1001.severity = warning + +# SA1002: Semicolons should not be preceded by a space +dotnet_diagnostic.SA1002.severity = none + +# SA1003: Operator should not appear at the end of a line +dotnet_diagnostic.SA1003.severity = none + +# SA1004: Documentation line should begin with a space +dotnet_diagnostic.SA1004.severity = none + +# SA1005: Single line comment should begin with a space +dotnet_diagnostic.SA1005.severity = none + +# SA1008: Opening parenthesis should not be preceded by a space +dotnet_diagnostic.SA1008.severity = none + +# SA1009: Closing parenthesis should not be followed by a space +dotnet_diagnostic.SA1009.severity = none + +# SA1010: Opening square brackets should not be preceded by a space +dotnet_diagnostic.SA1010.severity = none + +# SA1011: Closing square bracket should be followed by a space +dotnet_diagnostic.SA1011.severity = none + +# SA1012: Opening brace should be followed by a space +dotnet_diagnostic.SA1012.severity = none + +# SA1013: Closing brace should be preceded by a space +dotnet_diagnostic.SA1013.severity = none + +# SA1014: Opening generic brackets should not be preceded by a space +dotnet_diagnostic.SA1014.severity = warning + +# SA1015: Closing generic bracket should not be followed by a space +dotnet_diagnostic.SA1015.severity = none + +# SA1018: Nullable type symbol should not be preceded by a space +dotnet_diagnostic.SA1018.severity = warning + +# SA1020: Increment symbol should not be preceded by a space +dotnet_diagnostic.SA1020.severity = warning + +# SA1021: Negative sign should be preceded by a space +dotnet_diagnostic.SA1021.severity = none + +# SA1023: Dereference symbol '*' should not be preceded by a space." +dotnet_diagnostic.SA1023.severity = none + +# SA1024: Colon should be followed by a space +dotnet_diagnostic.SA1024.severity = none + +# SA1025: Code should not contain multiple whitespace characters in a row +dotnet_diagnostic.SA1025.severity = none + +# SA1026: Keyword followed by span or blank line +dotnet_diagnostic.SA1026.severity = warning + +# SA1027: Tabs and spaces should be used correctly +dotnet_diagnostic.SA1027.severity = warning + +# SA1028: Code should not contain trailing whitespace +dotnet_diagnostic.SA1028.severity = warning + +# SA1100: Do not prefix calls with base unless local implementation exists +dotnet_diagnostic.SA1100.severity = none + +# SA1101: Prefix local calls with this +dotnet_diagnostic.SA1101.severity = none + +# SA1102: Query clause should follow previous clause +dotnet_diagnostic.SA1102.severity = warning + +# SA1105: Query clauses spanning multiple lines should begin on own line +dotnet_diagnostic.SA1105.severity = warning + +# SA1106: Code should not contain empty statements +dotnet_diagnostic.SA1106.severity = none + +# SA1107: Code should not contain multiple statements on one line +dotnet_diagnostic.SA1107.severity = none + +# SA1108: Block statements should not contain embedded comments +dotnet_diagnostic.SA1108.severity = none + +# SA1110: Opening parenthesis or bracket should be on declaration line +dotnet_diagnostic.SA1110.severity = none + +# SA1111: Closing parenthesis should be on line of last parameter +dotnet_diagnostic.SA1111.severity = none + +# SA1113: Comma should be on the same line as previous parameter +dotnet_diagnostic.SA1113.severity = warning + +# SA1114: Parameter list should follow declaration +dotnet_diagnostic.SA1114.severity = none + +# SA1115: Parameter should begin on the line after the previous parameter +dotnet_diagnostic.SA1115.severity = warning + +# SA1116: Split parameters should start on line after declaration +dotnet_diagnostic.SA1116.severity = none + +# SA1117: Parameters should be on same line or separate lines +dotnet_diagnostic.SA1117.severity = none + +# SA1118: Parameter should not span multiple lines +dotnet_diagnostic.SA1118.severity = none + +# SA1119: Statement should not use unnecessary parenthesis +dotnet_diagnostic.SA1119.severity = none + +# SA1120: Comments should contain text +dotnet_diagnostic.SA1120.severity = none + +# SA1121: Use built-in type alias +dotnet_diagnostic.SA1121.severity = warning + +# SA1122: Use string.Empty for empty strings +dotnet_diagnostic.SA1122.severity = none + +# SA1123: Region should not be located within a code element +dotnet_diagnostic.SA1123.severity = none + +# SA1124: Do not use regions +dotnet_diagnostic.SA1124.severity = none + +# SA1125: Use shorthand for nullable types +dotnet_diagnostic.SA1125.severity = none + +# SA1127: Generic type constraints should be on their own line +dotnet_diagnostic.SA1127.severity = none + +# SA1128: Put constructor initializers on their own line +dotnet_diagnostic.SA1128.severity = none + +# SA1129: Do not use default value type constructor +dotnet_diagnostic.SA1129.severity = warning + +# SA1130: Use lambda syntax +dotnet_diagnostic.SA1130.severity = none + +# SA1131: Constant values should appear on the right-hand side of comparisons +dotnet_diagnostic.SA1131.severity = none + +# SA1132: Do not combine fields +dotnet_diagnostic.SA1132.severity = none + +# SA1133: Do not combine attributes +dotnet_diagnostic.SA1133.severity = none + +# SA1134: Each attribute should be placed on its own line of code +dotnet_diagnostic.SA1134.severity = none + +# SA1135: Using directive should be qualified +dotnet_diagnostic.SA1135.severity = none + +# SA1136: Enum values should be on separate lines +dotnet_diagnostic.SA1136.severity = none + +# SA1137: Elements should have the same indentation +dotnet_diagnostic.SA1137.severity = none + +# SA1139: Use literal suffix notation instead of casting +dotnet_diagnostic.SA1139.severity = none + +# SA1141: Use tuple syntax +dotnet_diagnostic.SA1141.severity = warning + +# SA1142: Refer to tuple elements by name +dotnet_diagnostic.SA1142.severity = warning + +# SA1200: Using directive should appear within a namespace declaration +dotnet_diagnostic.SA1200.severity = none + +# SA1201: Elements should appear in the correct order +dotnet_diagnostic.SA1201.severity = none + +# SA1202: Elements should be ordered by access +dotnet_diagnostic.SA1202.severity = none + +# SA1203: Constants should appear before fields +dotnet_diagnostic.SA1203.severity = none + +# SA1204: Static elements should appear before instance elements +dotnet_diagnostic.SA1204.severity = none + +# SA1205: Partial elements should declare an access modifier +dotnet_diagnostic.SA1205.severity = warning + +# SA1206: Keyword ordering +dotnet_diagnostic.SA1206.severity = warning + +# SA1208: Using directive ordering +dotnet_diagnostic.SA1208.severity = none + +# SA1209: Using alias directives should be placed after all using namespace directives +dotnet_diagnostic.SA1209.severity = none + +# SA1210: Using directives should be ordered alphabetically by the namespaces +dotnet_diagnostic.SA1210.severity = none + +# SA1211: Using alias directive ordering +dotnet_diagnostic.SA1211.severity = none + +# SA1212: A get accessor appears after a set accessor within a property or indexer +dotnet_diagnostic.SA1212.severity = warning + +# SA1214: Readonly fields should appear before non-readonly fields +dotnet_diagnostic.SA1214.severity = none + +# SA1216: Using static directives should be placed at the correct location +dotnet_diagnostic.SA1216.severity = none + +# SA1300: Element should begin with an uppercase letter +dotnet_diagnostic.SA1300.severity = none + +# SA1302: Interface names should begin with I +dotnet_diagnostic.SA1302.severity = warning + +# SA1303: Const field names should begin with upper-case letter +dotnet_diagnostic.SA1303.severity = none + +# SA1304: Non-private readonly fields should begin with upper-case letter +dotnet_diagnostic.SA1304.severity = none + +# SA1306: Field should begin with lower-case letter +dotnet_diagnostic.SA1306.severity = none + +# SA1307: Field should begin with upper-case letter +dotnet_diagnostic.SA1307.severity = none + +# SA1308: Field should not begin with the prefix 's_' +dotnet_diagnostic.SA1308.severity = none + +# SA1309: Field names should not begin with underscore +dotnet_diagnostic.SA1309.severity = none + +# SA1310: Field should not contain an underscore +dotnet_diagnostic.SA1310.severity = none + +# SA1311: Static readonly fields should begin with upper-case letter +dotnet_diagnostic.SA1311.severity = none + +# SA1312: Variable should begin with lower-case letter +dotnet_diagnostic.SA1312.severity = none + +# SA1313: Parameter should begin with lower-case letter +dotnet_diagnostic.SA1313.severity = none + +# SA1314: Type parameter names should begin with T +dotnet_diagnostic.SA1314.severity = none + +# SA1316: Tuple element names should use correct casing +dotnet_diagnostic.SA1316.severity = none + +# SA1400: Member should declare an access modifier +dotnet_diagnostic.SA1400.severity = warning + +# SA1401: Fields should be private +dotnet_diagnostic.SA1401.severity = none + +# SA1402: File may only contain a single type +dotnet_diagnostic.SA1402.severity = none + +# SA1403: File may only contain a single namespace +dotnet_diagnostic.SA1403.severity = none + +# SA1404: Code analysis suppression should have justification +dotnet_diagnostic.SA1404.severity = warning + +# SA1405: Debug.Assert should provide message text +dotnet_diagnostic.SA1405.severity = none + +# SA1407: Arithmetic expressions should declare precedence +dotnet_diagnostic.SA1407.severity = none + +# SA1408: Conditional expressions should declare precedence +dotnet_diagnostic.SA1408.severity = none + +# SA1410: Remove delegate parentheses when possible +dotnet_diagnostic.SA1410.severity = warning + +# SA1411: Attribute constructor shouldn't use unnecessary parenthesis +dotnet_diagnostic.SA1411.severity = warning + +# SA1413: Use trailing comma in multi-line initializers +dotnet_diagnostic.SA1413.severity = none + +# SA1414: Tuple types in signatures should have element names +dotnet_diagnostic.SA1414.severity = none + +# SA1500: Braces for multi-line statements should not share line +dotnet_diagnostic.SA1500.severity = none + +# SA1501: Statement should not be on a single line +dotnet_diagnostic.SA1501.severity = none + +# SA1502: Element should not be on a single line +dotnet_diagnostic.SA1502.severity = none + +# SA1503: Braces should not be omitted +dotnet_diagnostic.SA1503.severity = none + +# SA1504: All accessors should be single-line or multi-line +dotnet_diagnostic.SA1504.severity = none + +# SA1505: An opening brace should not be followed by a blank line +dotnet_diagnostic.SA1505.severity = none + +# SA1506: Element documentation headers should not be followed by blank line +dotnet_diagnostic.SA1506.severity = none + +# SA1507: Code should not contain multiple blank lines in a row +dotnet_diagnostic.SA1507.severity = none + +# SA1508: A closing brace should not be preceded by a blank line +dotnet_diagnostic.SA1508.severity = none + +# SA1509: Opening braces should not be preceded by blank line +dotnet_diagnostic.SA1509.severity = none + +# SA1510: 'else' statement should not be preceded by a blank line +dotnet_diagnostic.SA1510.severity = none + +# SA1512: Single-line comments should not be followed by blank line +dotnet_diagnostic.SA1512.severity = none + +# SA1513: Closing brace should be followed by blank line +dotnet_diagnostic.SA1513.severity = none + +# SA1514: Element documentation header should be preceded by blank line +dotnet_diagnostic.SA1514.severity = none + +# SA1515: Single-line comment should be preceded by blank line +dotnet_diagnostic.SA1515.severity = none + +# SA1516: Elements should be separated by blank line +dotnet_diagnostic.SA1516.severity = none + +# SA1517: Code should not contain blank lines at start of file +dotnet_diagnostic.SA1517.severity = warning + +# SA1518: Code should not contain blank lines at the end of the file +dotnet_diagnostic.SA1518.severity = warning + +# SA1519: Braces should not be omitted from multi-line child statement +dotnet_diagnostic.SA1519.severity = none + +# SA1520: Use braces consistently +dotnet_diagnostic.SA1520.severity = none + +# SA1600: Elements should be documented +dotnet_diagnostic.SA1600.severity = none + +# SA1601: Partial elements should be documented +dotnet_diagnostic.SA1601.severity = none + +# SA1602: Enumeration items should be documented +dotnet_diagnostic.SA1602.severity = none + +# SA1604: Element documentation should have summary +dotnet_diagnostic.SA1604.severity = none + +# SA1605: Partial element documentation should have summary +dotnet_diagnostic.SA1605.severity = none + +# SA1606: Element documentation should have summary text +dotnet_diagnostic.SA1606.severity = none + +# SA1608: Element documentation should not have default summary +dotnet_diagnostic.SA1608.severity = none + +# SA1610: Property documentation should have value text +dotnet_diagnostic.SA1610.severity = none + +# SA1611: The documentation for parameter 'message' is missing +dotnet_diagnostic.SA1611.severity = none + +# SA1612: The parameter documentation is at incorrect position +dotnet_diagnostic.SA1612.severity = none + +# SA1614: Element parameter documentation should have text +dotnet_diagnostic.SA1614.severity = none + +# SA1615: Element return value should be documented +dotnet_diagnostic.SA1615.severity = none + +# SA1616: Element return value documentation should have text +dotnet_diagnostic.SA1616.severity = none + +# SA1618: The documentation for type parameter is missing +dotnet_diagnostic.SA1618.severity = none + +# SA1619: The documentation for type parameter is missing +dotnet_diagnostic.SA1619.severity = none + +# SA1622: Generic type parameter documentation should have text +dotnet_diagnostic.SA1622.severity = none + +# SA1623: Property documentation text +dotnet_diagnostic.SA1623.severity = none + +# SA1624: Because the property only contains a visible get accessor, the documentation summary text should begin with 'Gets' +dotnet_diagnostic.SA1624.severity = none + +# SA1625: Element documentation should not be copied and pasted +dotnet_diagnostic.SA1625.severity = none + +# SA1626: Single-line comments should not use documentation style slashes +dotnet_diagnostic.SA1626.severity = none + +# SA1627: The documentation text within the \'exception\' tag should not be empty +dotnet_diagnostic.SA1627.severity = none + +# SA1629: Documentation text should end with a period +dotnet_diagnostic.SA1629.severity = none + +# SA1633: File should have header +dotnet_diagnostic.SA1633.severity = none + +# SA1642: Constructor summary documentation should begin with standard text +dotnet_diagnostic.SA1642.severity = none + +# SA1643: Destructor summary documentation should begin with standard text +dotnet_diagnostic.SA1643.severity = none + +# SA1649: File name should match first type name +dotnet_diagnostic.SA1649.severity = none + +# IDE0001: Simplify name +dotnet_diagnostic.IDE0001.severity = suggestion + +# IDE0002: Simplify member access +dotnet_diagnostic.IDE0002.severity = suggestion + +# IDE0003: Remove this or Me qualification +dotnet_diagnostic.IDE0003.severity = suggestion + +# IDE0004: Remove Unnecessary Cast +dotnet_diagnostic.IDE0004.severity = suggestion + +# IDE0005: Using directive is unnecessary. +dotnet_diagnostic.IDE0005.severity = suggestion + +# IDE0007: Use implicit type +dotnet_diagnostic.IDE0007.severity = silent + +# IDE0008: Use explicit type +dotnet_diagnostic.IDE0008.severity = suggestion + +# IDE0009: Add this or Me qualification +dotnet_diagnostic.IDE0009.severity = silent + +# IDE0010: Add missing cases +dotnet_diagnostic.IDE0010.severity = silent + +# IDE0011: Add braces +dotnet_diagnostic.IDE0011.severity = silent + +# IDE0016: Use 'throw' expression +dotnet_diagnostic.IDE0016.severity = silent + +# IDE0017: Simplify object initialization +dotnet_diagnostic.IDE0017.severity = suggestion + +# IDE0018: Inline variable declaration +dotnet_diagnostic.IDE0018.severity = suggestion + +# IDE0019: Use pattern matching to avoid as followed by a null check +dotnet_diagnostic.IDE0019.severity = suggestion + +# IDE0020: Use pattern matching to avoid is check followed by a cast (with variable) +dotnet_diagnostic.IDE0020.severity = suggestion + +# IDE0021: Use expression body for constructors +dotnet_diagnostic.IDE0021.severity = silent + +# IDE0022: Use expression body for methods +dotnet_diagnostic.IDE0022.severity = silent + +# IDE0023: Use expression body for operators +dotnet_diagnostic.IDE0023.severity = silent + +# IDE0024: Use expression body for operators +dotnet_diagnostic.IDE0024.severity = silent + +# IDE0025: Use expression body for properties +dotnet_diagnostic.IDE0025.severity = silent + +# IDE0026: Use expression body for indexers +dotnet_diagnostic.IDE0026.severity = silent + +# IDE0027: Use expression body for accessors +dotnet_diagnostic.IDE0027.severity = silent + +# IDE0028: Simplify collection initialization +dotnet_diagnostic.IDE0028.severity = suggestion + +# IDE0029: Use coalesce expression +dotnet_diagnostic.IDE0029.severity = suggestion + +# IDE0030: Use coalesce expression +dotnet_diagnostic.IDE0030.severity = suggestion + +# IDE0031: Use null propagation +dotnet_diagnostic.IDE0031.severity = silent + +# IDE0032: Use auto property +dotnet_diagnostic.IDE0032.severity = silent + +# IDE0033: Use explicitly provided tuple name +dotnet_diagnostic.IDE0033.severity = suggestion + +# IDE0034: Simplify 'default' expression +dotnet_diagnostic.IDE0034.severity = suggestion + +# IDE0035: Remove unreachable code +dotnet_diagnostic.IDE0035.severity = suggestion + +# IDE0036: Order modifiers +dotnet_diagnostic.IDE0036.severity = suggestion + +# IDE0037: Use inferred member name +dotnet_diagnostic.IDE0037.severity = silent + +# IDE0038: Use pattern matching to avoid is check followed by a cast (without variable) +dotnet_diagnostic.IDE0038.severity = suggestion + +# IDE0039: Use local function +dotnet_diagnostic.IDE0039.severity = suggestion + +# IDE0040: Add accessibility modifiers +dotnet_diagnostic.IDE0040.severity = suggestion + +# IDE0041: Use 'is null' check +dotnet_diagnostic.IDE0041.severity = warning + +# IDE0042: Deconstruct variable declaration +dotnet_diagnostic.IDE0042.severity = silent + +# IDE0043: Invalid format string +dotnet_diagnostic.IDE0043.severity = warning + +# IDE0044: Add readonly modifier +dotnet_diagnostic.IDE0044.severity = suggestion + +# IDE0045: Use conditional expression for assignment +dotnet_diagnostic.IDE0045.severity = suggestion + +# IDE0046: Use conditional expression for return +dotnet_diagnostic.IDE0046.severity = suggestion + +# IDE0047: Remove unnecessary parentheses +dotnet_diagnostic.IDE0047.severity = silent + +# IDE0048: Add parentheses for clarity +dotnet_diagnostic.IDE0048.severity = silent + +# IDE0049: Use language keywords instead of framework type names for type references +dotnet_diagnostic.IDE0049.severity = warning + +# IDE0050: Convert anonymous type to tuple +dotnet_diagnostic.IDE0050.severity = suggestion + +# IDE0051: Remove unused private members +dotnet_diagnostic.IDE0051.severity = suggestion + +# IDE0052: Remove unread private members +dotnet_diagnostic.IDE0052.severity = suggestion + +# IDE0053: Use expression body for lambdas +dotnet_diagnostic.IDE0053.severity = silent + +# IDE0054: Use compound assignment +dotnet_diagnostic.IDE0054.severity = suggestion + +# IDE0055: Fix formatting +dotnet_diagnostic.IDE0055.severity = suggestion + +# IDE0056: Use index operator +dotnet_diagnostic.IDE0056.severity = suggestion + +# IDE0057: Use range operator +dotnet_diagnostic.IDE0057.severity = suggestion + +# IDE0058: Expression value is never used +dotnet_diagnostic.IDE0058.severity = silent + +# IDE0059: Unnecessary assignment of a value +dotnet_diagnostic.IDE0059.severity = warning + +# IDE0060: Remove unused parameter +dotnet_diagnostic.IDE0060.severity = silent + +# IDE0061: Use expression body for local functions +dotnet_diagnostic.IDE0061.severity = silent + +# IDE0062: Make local function 'static' +dotnet_diagnostic.IDE0062.severity = warning + +# IDE0063: Use simple 'using' statement +dotnet_diagnostic.IDE0063.severity = silent + +# IDE0064: Make readonly fields writable +dotnet_diagnostic.IDE0064.severity = silent + +# IDE0065: Misplaced using directive +dotnet_diagnostic.IDE0065.severity = suggestion + +# IDE0066: Convert switch statement to expression +dotnet_diagnostic.IDE0066.severity = suggestion + +# IDE0070: Use 'System.HashCode' +dotnet_diagnostic.IDE0070.severity = suggestion + +# IDE0071: Simplify interpolation +dotnet_diagnostic.IDE0071.severity = suggestion + +# IDE0072: Add missing cases +dotnet_diagnostic.IDE0072.severity = silent + +# IDE0073: The file header is missing or not located at the top of the file +dotnet_diagnostic.IDE0073.severity = warning + +# IDE0074: Use compound assignment +dotnet_diagnostic.IDE0074.severity = suggestion + +# IDE0075: Simplify conditional expression +dotnet_diagnostic.IDE0075.severity = silent + +# IDE0076: Invalid global 'SuppressMessageAttribute' +dotnet_diagnostic.IDE0076.severity = warning + +# IDE0077: Avoid legacy format target in 'SuppressMessageAttribute' +dotnet_diagnostic.IDE0077.severity = silent + +# IDE0078: Use pattern matching +dotnet_diagnostic.IDE0078.severity = suggestion + +# IDE0079: Remove unnecessary suppression +dotnet_diagnostic.IDE0079.severity = suggestion + +# IDE0080: Remove unnecessary suppression operator +dotnet_diagnostic.IDE0080.severity = warning + +# IDE0081: Remove unnecessary suppression operator +dotnet_diagnostic.IDE0081.severity = none + +# IDE0082: 'typeof' can be converted to 'nameof' +dotnet_diagnostic.IDE0082.severity = warning + +# IDE0083: Use pattern matching +dotnet_diagnostic.IDE0083.severity = silent + +# IDE0084: Use pattern matching (IsNot operator) +dotnet_diagnostic.IDE0084.severity = none + +# IDE0090: Use 'new(...)' +dotnet_diagnostic.IDE0090.severity = silent + +# IDE0100: Remove redundant equality +dotnet_diagnostic.IDE0100.severity = suggestion + +# IDE0110: Remove unnecessary discard +dotnet_diagnostic.IDE0110.severity = suggestion + +# IDE0120: Simplify LINQ expression +dotnet_diagnostic.IDE0120.severity = none + +# IDE0130: Namespace does not match folder structure +dotnet_diagnostic.IDE0130.severity = silent + +# IDE0140: Simplify object creation +dotnet_diagnostic.IDE0140.severity = none + +# IDE0150: Prefer 'null' check over type check +dotnet_diagnostic.IDE0150.severity = silent + +# IDE0160: Convert to block scoped namespace +dotnet_diagnostic.IDE0160.severity = silent + +# IDE0161: Convert to file-scoped namespace +dotnet_diagnostic.IDE0161.severity = silent + +# IDE1005: Delegate invocation can be simplified. +dotnet_diagnostic.IDE1005.severity = suggestion + +# IDE1006: Naming styles +dotnet_diagnostic.IDE1006.severity = silent + +# IDE2000: Allow multiple blank lines +dotnet_diagnostic.IDE2000.severity = silent + +# IDE2001: Embedded statements must be on their own line +dotnet_diagnostic.IDE2001.severity = silent + +# IDE2002: Consecutive braces must not have blank line between them +dotnet_diagnostic.IDE2002.severity = silent + +# IDE2003: Allow statement immediately after block +dotnet_diagnostic.IDE2003.severity = silent + +# IDE2004: Blank line not allowed after constructor initializer colon +dotnet_diagnostic.IDE2004.severity = silent + +# !!! OVERRIDES +# !!! Note: It is preferred to minimize the overrides if possible (just to follow the MS dotnet convention as much as possible) + +# IDE0008: Use explicit type +dotnet_diagnostic.IDE0008.severity = none diff --git a/lang/csharp/CodeAnalysis.test.globalconfig b/lang/csharp/CodeAnalysis.test.globalconfig new file mode 100644 index 00000000000..ffb541fa360 --- /dev/null +++ b/lang/csharp/CodeAnalysis.test.globalconfig @@ -0,0 +1,1729 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# !!! Original: https://github.com/dotnet/runtime/blob/main/eng/CodeAnalysis.test.globalconfig +# !!! Any overrides should be added to the end of the file + +is_global = true + +# AD0001: Analyzer threw an exception +dotnet_diagnostic.AD0001.severity = none + +# BCL0001: Ensure minimum API surface is respected +dotnet_diagnostic.BCL0001.severity = none + +# BCL0010: AppContext default value expected to be true +dotnet_diagnostic.BCL0010.severity = none + +# BCL0011: AppContext default value defined in if statement with incorrect pattern +dotnet_diagnostic.BCL0011.severity = none + +# BCL0012: AppContext default value defined in if statement at root of switch case +dotnet_diagnostic.BCL0012.severity = none + +# BCL0015: Invalid P/Invoke call +dotnet_diagnostic.BCL0015.severity = none + +# BCL0020: Invalid SR.Format call +dotnet_diagnostic.BCL0020.severity = none + +# CA1000: Do not declare static members on generic types +dotnet_diagnostic.CA1000.severity = none + +# CA1001: Types that own disposable fields should be disposable +dotnet_diagnostic.CA1001.severity = none + +# CA1002: Do not expose generic lists +dotnet_diagnostic.CA1002.severity = none + +# CA1003: Use generic event handler instances +dotnet_diagnostic.CA1003.severity = none + +# CA1005: Avoid excessive parameters on generic types +dotnet_diagnostic.CA1005.severity = none + +# CA1008: Enums should have zero value +dotnet_diagnostic.CA1008.severity = none + +# CA1010: Generic interface should also be implemented +dotnet_diagnostic.CA1010.severity = none + +# CA1012: Abstract types should not have public constructors +dotnet_diagnostic.CA1012.severity = none + +# CA1014: Mark assemblies with CLSCompliant +dotnet_diagnostic.CA1014.severity = none + +# CA1016: Mark assemblies with assembly version +dotnet_diagnostic.CA1016.severity = none + +# CA1017: Mark assemblies with ComVisible +dotnet_diagnostic.CA1017.severity = none + +# CA1018: Mark attributes with AttributeUsageAttribute +dotnet_diagnostic.CA1018.severity = none + +# CA1019: Define accessors for attribute arguments +dotnet_diagnostic.CA1019.severity = none + +# CA1021: Avoid out parameters +dotnet_diagnostic.CA1021.severity = none + +# CA1024: Use properties where appropriate +dotnet_diagnostic.CA1024.severity = none + +# CA1027: Mark enums with FlagsAttribute +dotnet_diagnostic.CA1027.severity = none + +# CA1028: Enum Storage should be Int32 +dotnet_diagnostic.CA1028.severity = none + +# CA1030: Use events where appropriate +dotnet_diagnostic.CA1030.severity = none + +# CA1031: Do not catch general exception types +dotnet_diagnostic.CA1031.severity = none + +# CA1032: Implement standard exception constructors +dotnet_diagnostic.CA1032.severity = none + +# CA1033: Interface methods should be callable by child types +dotnet_diagnostic.CA1033.severity = none + +# CA1034: Nested types should not be visible +dotnet_diagnostic.CA1034.severity = none + +# CA1036: Override methods on comparable types +dotnet_diagnostic.CA1036.severity = none + +# CA1040: Avoid empty interfaces +dotnet_diagnostic.CA1040.severity = none + +# CA1041: Provide ObsoleteAttribute message +dotnet_diagnostic.CA1041.severity = none + +# CA1043: Use Integral Or String Argument For Indexers +dotnet_diagnostic.CA1043.severity = none + +# CA1044: Properties should not be write only +dotnet_diagnostic.CA1044.severity = none + +# CA1045: Do not pass types by reference +dotnet_diagnostic.CA1045.severity = none + +# CA1046: Do not overload equality operator on reference types +dotnet_diagnostic.CA1046.severity = none + +# CA1047: Do not declare protected member in sealed type +dotnet_diagnostic.CA1047.severity = none + +# CA1050: Declare types in namespaces +dotnet_diagnostic.CA1050.severity = none + +# CA1051: Do not declare visible instance fields +dotnet_diagnostic.CA1051.severity = none + +# CA1052: Static holder types should be Static or NotInheritable +dotnet_diagnostic.CA1052.severity = none + +# CA1054: URI-like parameters should not be strings +dotnet_diagnostic.CA1054.severity = none + +# CA1055: URI-like return values should not be strings +dotnet_diagnostic.CA1055.severity = none + +# CA1056: URI-like properties should not be strings +dotnet_diagnostic.CA1056.severity = none + +# CA1058: Types should not extend certain base types +dotnet_diagnostic.CA1058.severity = none + +# CA1060: Move pinvokes to native methods class +dotnet_diagnostic.CA1060.severity = none + +# CA1061: Do not hide base class methods +dotnet_diagnostic.CA1061.severity = none + +# CA1062: Validate arguments of public methods +dotnet_diagnostic.CA1062.severity = none + +# CA1063: Implement IDisposable Correctly +dotnet_diagnostic.CA1063.severity = none + +# CA1064: Exceptions should be public +dotnet_diagnostic.CA1064.severity = none + +# CA1065: Do not raise exceptions in unexpected locations +dotnet_diagnostic.CA1065.severity = none + +# CA1066: Implement IEquatable when overriding Object.Equals +dotnet_diagnostic.CA1066.severity = none + +# CA1067: Override Object.Equals(object) when implementing IEquatable +dotnet_diagnostic.CA1067.severity = none + +# CA1068: CancellationToken parameters must come last +dotnet_diagnostic.CA1068.severity = none + +# CA1069: Enums values should not be duplicated +dotnet_diagnostic.CA1069.severity = none + +# CA1070: Do not declare event fields as virtual +dotnet_diagnostic.CA1070.severity = none + +# CA1200: Avoid using cref tags with a prefix +dotnet_diagnostic.CA1200.severity = none + +# CA1303: Do not pass literals as localized parameters +dotnet_diagnostic.CA1303.severity = none + +# CA1304: Specify CultureInfo +dotnet_diagnostic.CA1304.severity = none + +# CA1305: Specify IFormatProvider +dotnet_diagnostic.CA1305.severity = none + +# CA1307: Specify StringComparison for clarity +dotnet_diagnostic.CA1307.severity = none + +# CA1308: Normalize strings to uppercase +dotnet_diagnostic.CA1308.severity = none + +# CA1309: Use ordinal string comparison +dotnet_diagnostic.CA1309.severity = none + +# CA1310: Specify StringComparison for correctness +dotnet_diagnostic.CA1310.severity = none + +# CA1401: P/Invokes should not be visible +dotnet_diagnostic.CA1401.severity = none + +# CA1416: Validate platform compatibility +dotnet_diagnostic.CA1416.severity = none + +# CA1417: Do not use 'OutAttribute' on string parameters for P/Invokes +dotnet_diagnostic.CA1417.severity = none + +# CA1418: Use valid platform string +dotnet_diagnostic.CA1418.severity = none + +# CA1419: Provide a parameterless constructor that is as visible as the containing type for concrete types derived from 'System.Runtime.InteropServices.SafeHandle' +dotnet_diagnostic.CA1419.severity = none + +# CA1501: Avoid excessive inheritance +dotnet_diagnostic.CA1501.severity = none + +# CA1502: Avoid excessive complexity +dotnet_diagnostic.CA1502.severity = none + +# CA1505: Avoid unmaintainable code +dotnet_diagnostic.CA1505.severity = none + +# CA1506: Avoid excessive class coupling +dotnet_diagnostic.CA1506.severity = none + +# CA1507: Use nameof to express symbol names +dotnet_diagnostic.CA1507.severity = none + +# CA1508: Avoid dead conditional code +dotnet_diagnostic.CA1508.severity = none + +# CA1509: Invalid entry in code metrics rule specification file +dotnet_diagnostic.CA1509.severity = none + +# CA1700: Do not name enum values 'Reserved' +dotnet_diagnostic.CA1700.severity = none + +# CA1707: Identifiers should not contain underscores +dotnet_diagnostic.CA1707.severity = none + +# CA1708: Identifiers should differ by more than case +dotnet_diagnostic.CA1708.severity = none + +# CA1710: Identifiers should have correct suffix +dotnet_diagnostic.CA1710.severity = none + +# CA1711: Identifiers should not have incorrect suffix +dotnet_diagnostic.CA1711.severity = none + +# CA1712: Do not prefix enum values with type name +dotnet_diagnostic.CA1712.severity = none + +# CA1713: Events should not have 'Before' or 'After' prefix +dotnet_diagnostic.CA1713.severity = none + +# CA1715: Identifiers should have correct prefix +dotnet_diagnostic.CA1715.severity = none + +# CA1716: Identifiers should not match keywords +dotnet_diagnostic.CA1716.severity = none + +# CA1720: Identifier contains type name +dotnet_diagnostic.CA1720.severity = none + +# CA1721: Property names should not match get methods +dotnet_diagnostic.CA1721.severity = none + +# CA1724: Type names should not match namespaces +dotnet_diagnostic.CA1724.severity = none + +# CA1725: Parameter names should match base declaration +dotnet_diagnostic.CA1725.severity = none + +# CA1727: Use PascalCase for named placeholders +dotnet_diagnostic.CA1727.severity = none + +# CA1802: Use literals where appropriate +dotnet_diagnostic.CA1802.severity = none + +# CA1805: Do not initialize unnecessarily +dotnet_diagnostic.CA1805.severity = none + +# CA1806: Do not ignore method results +dotnet_diagnostic.CA1806.severity = none + +# CA1810: Initialize reference type static fields inline +dotnet_diagnostic.CA1810.severity = none + +# CA1812: Avoid uninstantiated internal classes +dotnet_diagnostic.CA1812.severity = none + +# CA1813: Avoid unsealed attributes +dotnet_diagnostic.CA1813.severity = none + +# CA1814: Prefer jagged arrays over multidimensional +dotnet_diagnostic.CA1814.severity = none + +# CA1815: Override equals and operator equals on value types +dotnet_diagnostic.CA1815.severity = none + +# CA1816: Dispose methods should call SuppressFinalize +dotnet_diagnostic.CA1816.severity = none + +# CA1819: Properties should not return arrays +dotnet_diagnostic.CA1819.severity = none + +# CA1820: Test for empty strings using string length +dotnet_diagnostic.CA1820.severity = none + +# CA1821: Remove empty Finalizers +dotnet_diagnostic.CA1821.severity = none + +# CA1822: Mark members as static +dotnet_diagnostic.CA1822.severity = none + +# CA1823: Avoid unused private fields +dotnet_diagnostic.CA1823.severity = none + +# CA1824: Mark assemblies with NeutralResourcesLanguageAttribute +dotnet_diagnostic.CA1824.severity = none + +# CA1825: Avoid zero-length array allocations. +dotnet_diagnostic.CA1825.severity = none + +# CA1826: Do not use Enumerable methods on indexable collections +dotnet_diagnostic.CA1826.severity = none + +# CA1827: Do not use Count() or LongCount() when Any() can be used +dotnet_diagnostic.CA1827.severity = none + +# CA1828: Do not use CountAsync() or LongCountAsync() when AnyAsync() can be used +dotnet_diagnostic.CA1828.severity = none + +# CA1829: Use Length/Count property instead of Count() when available +dotnet_diagnostic.CA1829.severity = none + +# CA1830: Prefer strongly-typed Append and Insert method overloads on StringBuilder +dotnet_diagnostic.CA1830.severity = none + +# CA1831: Use AsSpan or AsMemory instead of Range-based indexers when appropriate +dotnet_diagnostic.CA1831.severity = none + +# CA1832: Use AsSpan or AsMemory instead of Range-based indexers when appropriate +dotnet_diagnostic.CA1832.severity = none + +# CA1833: Use AsSpan or AsMemory instead of Range-based indexers when appropriate +dotnet_diagnostic.CA1833.severity = none + +# CA1834: Consider using 'StringBuilder.Append(char)' when applicable +dotnet_diagnostic.CA1834.severity = none + +# CA1835: Prefer the 'Memory'-based overloads for 'ReadAsync' and 'WriteAsync' +dotnet_diagnostic.CA1835.severity = none + +# CA1836: Prefer IsEmpty over Count +dotnet_diagnostic.CA1836.severity = none + +# CA1837: Use 'Environment.ProcessId' +dotnet_diagnostic.CA1837.severity = none + +# CA1838: Avoid 'StringBuilder' parameters for P/Invokes +dotnet_diagnostic.CA1838.severity = none + +# CA1839: Use 'Environment.ProcessPath' +dotnet_diagnostic.CA1839.severity = none + +# CA1840: Use 'Environment.CurrentManagedThreadId' +dotnet_diagnostic.CA1840.severity = none + +# CA1841: Prefer Dictionary.Contains methods +dotnet_diagnostic.CA1841.severity = none + +# CA1842: Do not use 'WhenAll' with a single task +dotnet_diagnostic.CA1842.severity = none + +# CA1843: Do not use 'WaitAll' with a single task +dotnet_diagnostic.CA1843.severity = none + +# CA1844: Provide memory-based overrides of async methods when subclassing 'Stream' +dotnet_diagnostic.CA1844.severity = none + +# CA1845: Use span-based 'string.Concat' +dotnet_diagnostic.CA1845.severity = none + +# CA1846: Prefer 'AsSpan' over 'Substring' +dotnet_diagnostic.CA1846.severity = none + +# CA1847: Use char literal for a single character lookup +dotnet_diagnostic.CA1847.severity = none + +# CA1848: Use the LoggerMessage delegates +dotnet_diagnostic.CA1848.severity = none + +# CA1849: Call async methods when in an async method +dotnet_diagnostic.CA1849.severity = none + +# CA1850: Prefer static 'HashData' method over 'ComputeHash' +dotnet_diagnostic.CA1850.severity = none + +# CA2000: Dispose objects before losing scope +dotnet_diagnostic.CA2000.severity = none + +# CA2002: Do not lock on objects with weak identity +dotnet_diagnostic.CA2002.severity = none + +# CA2007: Consider calling ConfigureAwait on the awaited task +dotnet_diagnostic.CA2007.severity = none + +# CA2008: Do not create tasks without passing a TaskScheduler +dotnet_diagnostic.CA2008.severity = none + +# CA2009: Do not call ToImmutableCollection on an ImmutableCollection value +dotnet_diagnostic.CA2009.severity = none + +# CA2011: Avoid infinite recursion +dotnet_diagnostic.CA2011.severity = none + +# CA2012: Use ValueTasks correctly +dotnet_diagnostic.CA2012.severity = none + +# CA2013: Do not use ReferenceEquals with value types +dotnet_diagnostic.CA2013.severity = none + +# CA2014: Do not use stackalloc in loops. +dotnet_diagnostic.CA2014.severity = none + +# CA2015: Do not define finalizers for types derived from MemoryManager +dotnet_diagnostic.CA2015.severity = none + +# CA2016: Forward the 'CancellationToken' parameter to methods +dotnet_diagnostic.CA2016.severity = none + +# CA2017: Parameter count mismatch +dotnet_diagnostic.CA2017.severity = warning + +# CA2018: 'Buffer.BlockCopy' expects the number of bytes to be copied for the 'count' argument +dotnet_diagnostic.CA2018.severity = warning + +# CA2100: Review SQL queries for security vulnerabilities +dotnet_diagnostic.CA2100.severity = none + +# CA2101: Specify marshaling for P/Invoke string arguments +dotnet_diagnostic.CA2101.severity = none + +# CA2109: Review visible event handlers +dotnet_diagnostic.CA2109.severity = none + +# CA2119: Seal methods that satisfy private interfaces +dotnet_diagnostic.CA2119.severity = none + +# CA2153: Do Not Catch Corrupted State Exceptions +dotnet_diagnostic.CA2153.severity = none + +# CA2200: Rethrow to preserve stack details +dotnet_diagnostic.CA2200.severity = none + +# CA2201: Do not raise reserved exception types +dotnet_diagnostic.CA2201.severity = none + +# CA2207: Initialize value type static fields inline +dotnet_diagnostic.CA2207.severity = none + +# CA2208: Instantiate argument exceptions correctly +dotnet_diagnostic.CA2208.severity = none + +# CA2211: Non-constant fields should not be visible +dotnet_diagnostic.CA2211.severity = none + +# CA2213: Disposable fields should be disposed +dotnet_diagnostic.CA2213.severity = none + +# CA2214: Do not call overridable methods in constructors +dotnet_diagnostic.CA2214.severity = none + +# CA2215: Dispose methods should call base class dispose +dotnet_diagnostic.CA2215.severity = none + +# CA2216: Disposable types should declare finalizer +dotnet_diagnostic.CA2216.severity = none + +# CA2217: Do not mark enums with FlagsAttribute +dotnet_diagnostic.CA2217.severity = none + +# CA2218: Override GetHashCode on overriding Equals +dotnet_diagnostic.CA2218.severity = none + +# CA2219: Do not raise exceptions in finally clauses +dotnet_diagnostic.CA2219.severity = none + +# CA2224: Override Equals on overloading operator equals +dotnet_diagnostic.CA2224.severity = none + +# CA2225: Operator overloads have named alternates +dotnet_diagnostic.CA2225.severity = none + +# CA2226: Operators should have symmetrical overloads +dotnet_diagnostic.CA2226.severity = none + +# CA2227: Collection properties should be read only +dotnet_diagnostic.CA2227.severity = none + +# CA2229: Implement serialization constructors +dotnet_diagnostic.CA2229.severity = none + +# CA2231: Overload operator equals on overriding value type Equals +dotnet_diagnostic.CA2231.severity = none + +# CA2234: Pass system uri objects instead of strings +dotnet_diagnostic.CA2234.severity = none + +# CA2235: Mark all non-serializable fields +dotnet_diagnostic.CA2235.severity = none + +# CA2237: Mark ISerializable types with serializable +dotnet_diagnostic.CA2237.severity = none + +# CA2241: Provide correct arguments to formatting methods +dotnet_diagnostic.CA2241.severity = none + +# CA2242: Test for NaN correctly +dotnet_diagnostic.CA2242.severity = none + +# CA2243: Attribute string literals should parse correctly +dotnet_diagnostic.CA2243.severity = none + +# CA2244: Do not duplicate indexed element initializations +dotnet_diagnostic.CA2244.severity = none + +# CA2245: Do not assign a property to itself +dotnet_diagnostic.CA2245.severity = none + +# CA2246: Assigning symbol and its member in the same statement +dotnet_diagnostic.CA2246.severity = none + +# CA2247: Argument passed to TaskCompletionSource constructor should be TaskCreationOptions enum instead of TaskContinuationOptions enum +dotnet_diagnostic.CA2247.severity = none + +# CA2248: Provide correct 'enum' argument to 'Enum.HasFlag' +dotnet_diagnostic.CA2248.severity = none + +# CA2249: Consider using 'string.Contains' instead of 'string.IndexOf' +dotnet_diagnostic.CA2249.severity = none + +# CA2250: Use 'ThrowIfCancellationRequested' +dotnet_diagnostic.CA2250.severity = none + +# CA2251: Use 'string.Equals' +dotnet_diagnostic.CA2251.severity = none + +# CA2252: This API requires opting into preview features +dotnet_diagnostic.CA2252.severity = error + +# CA2253: Named placeholders should not be numeric values +dotnet_diagnostic.CA2253.severity = none + +# CA2254: Template should be a static expression +dotnet_diagnostic.CA2254.severity = none + +# CA2255: The 'ModuleInitializer' attribute should not be used in libraries +dotnet_diagnostic.CA2255.severity = warning + +# CA2256: All members declared in parent interfaces must have an implementation in a DynamicInterfaceCastableImplementation-attributed interface +dotnet_diagnostic.CA2256.severity = warning + +# CA2257: Members defined on an interface with the 'DynamicInterfaceCastableImplementationAttribute' should be 'static' +dotnet_diagnostic.CA2257.severity = warning + +# CA2258: Providing a 'DynamicInterfaceCastableImplementation' interface in Visual Basic is unsupported +dotnet_diagnostic.CA2258.severity = warning + +# CA2300: Do not use insecure deserializer BinaryFormatter +dotnet_diagnostic.CA2300.severity = none + +# CA2301: Do not call BinaryFormatter.Deserialize without first setting BinaryFormatter.Binder +dotnet_diagnostic.CA2301.severity = none + +# CA2302: Ensure BinaryFormatter.Binder is set before calling BinaryFormatter.Deserialize +dotnet_diagnostic.CA2302.severity = none + +# CA2305: Do not use insecure deserializer LosFormatter +dotnet_diagnostic.CA2305.severity = none + +# CA2310: Do not use insecure deserializer NetDataContractSerializer +dotnet_diagnostic.CA2310.severity = none + +# CA2311: Do not deserialize without first setting NetDataContractSerializer.Binder +dotnet_diagnostic.CA2311.severity = none + +# CA2312: Ensure NetDataContractSerializer.Binder is set before deserializing +dotnet_diagnostic.CA2312.severity = none + +# CA2315: Do not use insecure deserializer ObjectStateFormatter +dotnet_diagnostic.CA2315.severity = none + +# CA2321: Do not deserialize with JavaScriptSerializer using a SimpleTypeResolver +dotnet_diagnostic.CA2321.severity = none + +# CA2322: Ensure JavaScriptSerializer is not initialized with SimpleTypeResolver before deserializing +dotnet_diagnostic.CA2322.severity = none + +# CA2326: Do not use TypeNameHandling values other than None +dotnet_diagnostic.CA2326.severity = none + +# CA2327: Do not use insecure JsonSerializerSettings +dotnet_diagnostic.CA2327.severity = none + +# CA2328: Ensure that JsonSerializerSettings are secure +dotnet_diagnostic.CA2328.severity = none + +# CA2329: Do not deserialize with JsonSerializer using an insecure configuration +dotnet_diagnostic.CA2329.severity = none + +# CA2330: Ensure that JsonSerializer has a secure configuration when deserializing +dotnet_diagnostic.CA2330.severity = none + +# CA2350: Do not use DataTable.ReadXml() with untrusted data +dotnet_diagnostic.CA2350.severity = none + +# CA2351: Do not use DataSet.ReadXml() with untrusted data +dotnet_diagnostic.CA2351.severity = none + +# CA2352: Unsafe DataSet or DataTable in serializable type can be vulnerable to remote code execution attacks +dotnet_diagnostic.CA2352.severity = none + +# CA2353: Unsafe DataSet or DataTable in serializable type +dotnet_diagnostic.CA2353.severity = none + +# CA2354: Unsafe DataSet or DataTable in deserialized object graph can be vulnerable to remote code execution attacks +dotnet_diagnostic.CA2354.severity = none + +# CA2355: Unsafe DataSet or DataTable type found in deserializable object graph +dotnet_diagnostic.CA2355.severity = none + +# CA2356: Unsafe DataSet or DataTable type in web deserializable object graph +dotnet_diagnostic.CA2356.severity = none + +# CA2361: Ensure auto-generated class containing DataSet.ReadXml() is not used with untrusted data +dotnet_diagnostic.CA2361.severity = none + +# CA2362: Unsafe DataSet or DataTable in auto-generated serializable type can be vulnerable to remote code execution attacks +dotnet_diagnostic.CA2362.severity = none + +# CA3001: Review code for SQL injection vulnerabilities +dotnet_diagnostic.CA3001.severity = none + +# CA3002: Review code for XSS vulnerabilities +dotnet_diagnostic.CA3002.severity = none + +# CA3003: Review code for file path injection vulnerabilities +dotnet_diagnostic.CA3003.severity = none + +# CA3004: Review code for information disclosure vulnerabilities +dotnet_diagnostic.CA3004.severity = none + +# CA3005: Review code for LDAP injection vulnerabilities +dotnet_diagnostic.CA3005.severity = none + +# CA3006: Review code for process command injection vulnerabilities +dotnet_diagnostic.CA3006.severity = none + +# CA3007: Review code for open redirect vulnerabilities +dotnet_diagnostic.CA3007.severity = none + +# CA3008: Review code for XPath injection vulnerabilities +dotnet_diagnostic.CA3008.severity = none + +# CA3009: Review code for XML injection vulnerabilities +dotnet_diagnostic.CA3009.severity = none + +# CA3010: Review code for XAML injection vulnerabilities +dotnet_diagnostic.CA3010.severity = none + +# CA3011: Review code for DLL injection vulnerabilities +dotnet_diagnostic.CA3011.severity = none + +# CA3012: Review code for regex injection vulnerabilities +dotnet_diagnostic.CA3012.severity = none + +# CA3061: Do Not Add Schema By URL +dotnet_diagnostic.CA3061.severity = none + +# CA3075: Insecure DTD processing in XML +dotnet_diagnostic.CA3075.severity = none + +# CA3076: Insecure XSLT script processing. +dotnet_diagnostic.CA3076.severity = none + +# CA3077: Insecure Processing in API Design, XmlDocument and XmlTextReader +dotnet_diagnostic.CA3077.severity = none + +# CA3147: Mark Verb Handlers With Validate Antiforgery Token +dotnet_diagnostic.CA3147.severity = none + +# CA5350: Do Not Use Weak Cryptographic Algorithms +dotnet_diagnostic.CA5350.severity = none + +# CA5351: Do Not Use Broken Cryptographic Algorithms +dotnet_diagnostic.CA5351.severity = none + +# CA5358: Review cipher mode usage with cryptography experts +dotnet_diagnostic.CA5358.severity = none + +# CA5359: Do Not Disable Certificate Validation +dotnet_diagnostic.CA5359.severity = none + +# CA5360: Do Not Call Dangerous Methods In Deserialization +dotnet_diagnostic.CA5360.severity = none + +# CA5361: Do Not Disable SChannel Use of Strong Crypto +dotnet_diagnostic.CA5361.severity = none + +# CA5362: Potential reference cycle in deserialized object graph +dotnet_diagnostic.CA5362.severity = none + +# CA5363: Do Not Disable Request Validation +dotnet_diagnostic.CA5363.severity = none + +# CA5364: Do Not Use Deprecated Security Protocols +dotnet_diagnostic.CA5364.severity = none + +# CA5365: Do Not Disable HTTP Header Checking +dotnet_diagnostic.CA5365.severity = none + +# CA5366: Use XmlReader for 'DataSet.ReadXml()' +dotnet_diagnostic.CA5366.severity = none + +# CA5367: Do Not Serialize Types With Pointer Fields +dotnet_diagnostic.CA5367.severity = none + +# CA5368: Set ViewStateUserKey For Classes Derived From Page +dotnet_diagnostic.CA5368.severity = none + +# CA5369: Use XmlReader for 'XmlSerializer.Deserialize()' +dotnet_diagnostic.CA5369.severity = none + +# CA5370: Use XmlReader for XmlValidatingReader constructor +dotnet_diagnostic.CA5370.severity = none + +# CA5371: Use XmlReader for 'XmlSchema.Read()' +dotnet_diagnostic.CA5371.severity = none + +# CA5372: Use XmlReader for XPathDocument constructor +dotnet_diagnostic.CA5372.severity = none + +# CA5373: Do not use obsolete key derivation function +dotnet_diagnostic.CA5373.severity = none + +# CA5374: Do Not Use XslTransform +dotnet_diagnostic.CA5374.severity = none + +# CA5375: Do Not Use Account Shared Access Signature +dotnet_diagnostic.CA5375.severity = none + +# CA5376: Use SharedAccessProtocol HttpsOnly +dotnet_diagnostic.CA5376.severity = none + +# CA5377: Use Container Level Access Policy +dotnet_diagnostic.CA5377.severity = none + +# CA5378: Do not disable ServicePointManagerSecurityProtocols +dotnet_diagnostic.CA5378.severity = none + +# CA5379: Ensure Key Derivation Function algorithm is sufficiently strong +dotnet_diagnostic.CA5379.severity = none + +# CA5380: Do Not Add Certificates To Root Store +dotnet_diagnostic.CA5380.severity = none + +# CA5381: Ensure Certificates Are Not Added To Root Store +dotnet_diagnostic.CA5381.severity = none + +# CA5382: Use Secure Cookies In ASP.Net Core +dotnet_diagnostic.CA5382.severity = none + +# CA5383: Ensure Use Secure Cookies In ASP.NET Core +dotnet_diagnostic.CA5383.severity = none + +# CA5384: Do Not Use Digital Signature Algorithm (DSA) +dotnet_diagnostic.CA5384.severity = none + +# CA5385: Use RivestīŋŊShamirīŋŊAdleman (RSA) Algorithm With Sufficient Key Size +dotnet_diagnostic.CA5385.severity = none + +# CA5386: Avoid hardcoding SecurityProtocolType value +dotnet_diagnostic.CA5386.severity = none + +# CA5387: Do Not Use Weak Key Derivation Function With Insufficient Iteration Count +dotnet_diagnostic.CA5387.severity = none + +# CA5388: Ensure Sufficient Iteration Count When Using Weak Key Derivation Function +dotnet_diagnostic.CA5388.severity = none + +# CA5389: Do Not Add Archive Item's Path To The Target File System Path +dotnet_diagnostic.CA5389.severity = none + +# CA5390: Do not hard-code encryption key +dotnet_diagnostic.CA5390.severity = none + +# CA5391: Use antiforgery tokens in ASP.NET Core MVC controllers +dotnet_diagnostic.CA5391.severity = none + +# CA5392: Use DefaultDllImportSearchPaths attribute for P/Invokes +dotnet_diagnostic.CA5392.severity = none + +# CA5393: Do not use unsafe DllImportSearchPath value +dotnet_diagnostic.CA5393.severity = none + +# CA5394: Do not use insecure randomness +dotnet_diagnostic.CA5394.severity = none + +# CA5395: Miss HttpVerb attribute for action methods +dotnet_diagnostic.CA5395.severity = none + +# CA5396: Set HttpOnly to true for HttpCookie +dotnet_diagnostic.CA5396.severity = none + +# CA5397: Do not use deprecated SslProtocols values +dotnet_diagnostic.CA5397.severity = none + +# CA5398: Avoid hardcoded SslProtocols values +dotnet_diagnostic.CA5398.severity = none + +# CA5399: HttpClients should enable certificate revocation list checks +dotnet_diagnostic.CA5399.severity = none + +# CA5400: Ensure HttpClient certificate revocation list check is not disabled +dotnet_diagnostic.CA5400.severity = none + +# CA5401: Do not use CreateEncryptor with non-default IV +dotnet_diagnostic.CA5401.severity = none + +# CA5402: Use CreateEncryptor with the default IV +dotnet_diagnostic.CA5402.severity = none + +# CA5403: Do not hard-code certificate +dotnet_diagnostic.CA5403.severity = none + +# CA5404: Do not disable token validation checks +dotnet_diagnostic.CA5404.severity = none + +# CA5405: Do not always skip token validation in delegates +dotnet_diagnostic.CA5405.severity = none + +# IL3000: Avoid using accessing Assembly file path when publishing as a single-file +dotnet_diagnostic.IL3000.severity = none + +# IL3001: Avoid using accessing Assembly file path when publishing as a single-file +dotnet_diagnostic.IL3001.severity = none + +# IL3002: Using member with RequiresAssemblyFilesAttribute can break functionality when embedded in a single-file app +dotnet_diagnostic.IL3002.severity = none + +# SA0001: XML comments +dotnet_diagnostic.SA0001.severity = none + +# SA1000: Spacing around keywords +dotnet_diagnostic.SA1000.severity = none + +# SA1001: Commas should not be preceded by whitespace +dotnet_diagnostic.SA1001.severity = none + +# SA1002: Semicolons should not be preceded by a space +dotnet_diagnostic.SA1002.severity = none + +# SA1003: Operator should not appear at the end of a line +dotnet_diagnostic.SA1003.severity = none + +# SA1004: Documentation line should begin with a space +dotnet_diagnostic.SA1004.severity = none + +# SA1005: Single line comment should begin with a space +dotnet_diagnostic.SA1005.severity = none + +# SA1008: Opening parenthesis should not be preceded by a space +dotnet_diagnostic.SA1008.severity = none + +# SA1009: Closing parenthesis should not be followed by a space +dotnet_diagnostic.SA1009.severity = none + +# SA1010: Opening square brackets should not be preceded by a space +dotnet_diagnostic.SA1010.severity = none + +# SA1011: Closing square bracket should be followed by a space +dotnet_diagnostic.SA1011.severity = none + +# SA1012: Opening brace should be followed by a space +dotnet_diagnostic.SA1012.severity = none + +# SA1013: Closing brace should be preceded by a space +dotnet_diagnostic.SA1013.severity = none + +# SA1014: Opening generic brackets should not be preceded by a space +dotnet_diagnostic.SA1014.severity = none + +# SA1015: Closing generic bracket should not be followed by a space +dotnet_diagnostic.SA1015.severity = none + +# SA1018: Nullable type symbol should not be preceded by a space +dotnet_diagnostic.SA1018.severity = none + +# SA1020: Increment symbol should not be preceded by a space +dotnet_diagnostic.SA1020.severity = none + +# SA1021: Negative sign should be preceded by a space +dotnet_diagnostic.SA1021.severity = none + +# SA1023: Dereference symbol '*' should not be preceded by a space." +dotnet_diagnostic.SA1023.severity = none + +# SA1024: Colon should be followed by a space +dotnet_diagnostic.SA1024.severity = none + +# SA1025: Code should not contain multiple whitespace characters in a row +dotnet_diagnostic.SA1025.severity = none + +# SA1026: Keyword followed by span or blank line +dotnet_diagnostic.SA1026.severity = none + +# SA1027: Tabs and spaces should be used correctly +dotnet_diagnostic.SA1027.severity = none + +# SA1028: Code should not contain trailing whitespace +dotnet_diagnostic.SA1028.severity = none + +# SA1100: Do not prefix calls with base unless local implementation exists +dotnet_diagnostic.SA1100.severity = none + +# SA1101: Prefix local calls with this +dotnet_diagnostic.SA1101.severity = none + +# SA1102: Query clause should follow previous clause +dotnet_diagnostic.SA1102.severity = none + +# SA1105: Query clauses spanning multiple lines should begin on own line +dotnet_diagnostic.SA1105.severity = none + +# SA1106: Code should not contain empty statements +dotnet_diagnostic.SA1106.severity = none + +# SA1107: Code should not contain multiple statements on one line +dotnet_diagnostic.SA1107.severity = none + +# SA1108: Block statements should not contain embedded comments +dotnet_diagnostic.SA1108.severity = none + +# SA1110: Opening parenthesis or bracket should be on declaration line +dotnet_diagnostic.SA1110.severity = none + +# SA1111: Closing parenthesis should be on line of last parameter +dotnet_diagnostic.SA1111.severity = none + +# SA1113: Comma should be on the same line as previous parameter +dotnet_diagnostic.SA1113.severity = none + +# SA1114: Parameter list should follow declaration +dotnet_diagnostic.SA1114.severity = none + +# SA1115: Parameter should begin on the line after the previous parameter +dotnet_diagnostic.SA1115.severity = none + +# SA1116: Split parameters should start on line after declaration +dotnet_diagnostic.SA1116.severity = none + +# SA1117: Parameters should be on same line or separate lines +dotnet_diagnostic.SA1117.severity = none + +# SA1118: Parameter should not span multiple lines +dotnet_diagnostic.SA1118.severity = none + +# SA1119: Statement should not use unnecessary parenthesis +dotnet_diagnostic.SA1119.severity = none + +# SA1120: Comments should contain text +dotnet_diagnostic.SA1120.severity = none + +# SA1121: Use built-in type alias +dotnet_diagnostic.SA1121.severity = none + +# SA1122: Use string.Empty for empty strings +dotnet_diagnostic.SA1122.severity = none + +# SA1123: Region should not be located within a code element +dotnet_diagnostic.SA1123.severity = none + +# SA1124: Do not use regions +dotnet_diagnostic.SA1124.severity = none + +# SA1125: Use shorthand for nullable types +dotnet_diagnostic.SA1125.severity = none + +# SA1127: Generic type constraints should be on their own line +dotnet_diagnostic.SA1127.severity = none + +# SA1128: Put constructor initializers on their own line +dotnet_diagnostic.SA1128.severity = none + +# SA1129: Do not use default value type constructor +dotnet_diagnostic.SA1129.severity = none + +# SA1130: Use lambda syntax +dotnet_diagnostic.SA1130.severity = none + +# SA1131: Constant values should appear on the right-hand side of comparisons +dotnet_diagnostic.SA1131.severity = none + +# SA1132: Do not combine fields +dotnet_diagnostic.SA1132.severity = none + +# SA1133: Do not combine attributes +dotnet_diagnostic.SA1133.severity = none + +# SA1134: Each attribute should be placed on its own line of code +dotnet_diagnostic.SA1134.severity = none + +# SA1135: Using directive should be qualified +dotnet_diagnostic.SA1135.severity = none + +# SA1136: Enum values should be on separate lines +dotnet_diagnostic.SA1136.severity = none + +# SA1137: Elements should have the same indentation +dotnet_diagnostic.SA1137.severity = none + +# SA1139: Use literal suffix notation instead of casting +dotnet_diagnostic.SA1139.severity = none + +# SA1141: Use tuple syntax +dotnet_diagnostic.SA1141.severity = none + +# SA1142: Refer to tuple elements by name +dotnet_diagnostic.SA1142.severity = none + +# SA1200: Using directive should appear within a namespace declaration +dotnet_diagnostic.SA1200.severity = none + +# SA1201: Elements should appear in the correct order +dotnet_diagnostic.SA1201.severity = none + +# SA1202: Elements should be ordered by access +dotnet_diagnostic.SA1202.severity = none + +# SA1203: Constants should appear before fields +dotnet_diagnostic.SA1203.severity = none + +# SA1204: Static elements should appear before instance elements +dotnet_diagnostic.SA1204.severity = none + +# SA1205: Partial elements should declare an access modifier +dotnet_diagnostic.SA1205.severity = none + +# SA1206: Keyword ordering +dotnet_diagnostic.SA1206.severity = none + +# SA1208: Using directive ordering +dotnet_diagnostic.SA1208.severity = none + +# SA1209: Using alias directives should be placed after all using namespace directives +dotnet_diagnostic.SA1209.severity = none + +# SA1210: Using directives should be ordered alphabetically by the namespaces +dotnet_diagnostic.SA1210.severity = none + +# SA1211: Using alias directive ordering +dotnet_diagnostic.SA1211.severity = none + +# SA1212: A get accessor appears after a set accessor within a property or indexer +dotnet_diagnostic.SA1212.severity = none + +# SA1214: Readonly fields should appear before non-readonly fields +dotnet_diagnostic.SA1214.severity = none + +# SA1216: Using static directives should be placed at the correct location +dotnet_diagnostic.SA1216.severity = none + +# SA1300: Element should begin with an uppercase letter +dotnet_diagnostic.SA1300.severity = none + +# SA1302: Interface names should begin with I +dotnet_diagnostic.SA1302.severity = none + +# SA1303: Const field names should begin with upper-case letter +dotnet_diagnostic.SA1303.severity = none + +# SA1304: Non-private readonly fields should begin with upper-case letter +dotnet_diagnostic.SA1304.severity = none + +# SA1306: Field should begin with lower-case letter +dotnet_diagnostic.SA1306.severity = none + +# SA1307: Field should begin with upper-case letter +dotnet_diagnostic.SA1307.severity = none + +# SA1308: Field should not begin with the prefix 's_' +dotnet_diagnostic.SA1308.severity = none + +# SA1309: Field names should not begin with underscore +dotnet_diagnostic.SA1309.severity = none + +# SA1310: Field should not contain an underscore +dotnet_diagnostic.SA1310.severity = none + +# SA1311: Static readonly fields should begin with upper-case letter +dotnet_diagnostic.SA1311.severity = none + +# SA1312: Variable should begin with lower-case letter +dotnet_diagnostic.SA1312.severity = none + +# SA1313: Parameter should begin with lower-case letter +dotnet_diagnostic.SA1313.severity = none + +# SA1314: Type parameter names should begin with T +dotnet_diagnostic.SA1314.severity = none + +# SA1316: Tuple element names should use correct casing +dotnet_diagnostic.SA1316.severity = none + +# SA1400: Member should declare an access modifier +dotnet_diagnostic.SA1400.severity = none + +# SA1401: Fields should be private +dotnet_diagnostic.SA1401.severity = none + +# SA1402: File may only contain a single type +dotnet_diagnostic.SA1402.severity = none + +# SA1403: File may only contain a single namespace +dotnet_diagnostic.SA1403.severity = none + +# SA1404: Code analysis suppression should have justification +dotnet_diagnostic.SA1404.severity = none + +# SA1405: Debug.Assert should provide message text +dotnet_diagnostic.SA1405.severity = none + +# SA1407: Arithmetic expressions should declare precedence +dotnet_diagnostic.SA1407.severity = none + +# SA1408: Conditional expressions should declare precedence +dotnet_diagnostic.SA1408.severity = none + +# SA1410: Remove delegate parentheses when possible +dotnet_diagnostic.SA1410.severity = none + +# SA1411: Attribute constructor shouldn't use unnecessary parenthesis +dotnet_diagnostic.SA1411.severity = none + +# SA1413: Use trailing comma in multi-line initializers +dotnet_diagnostic.SA1413.severity = none + +# SA1414: Tuple types in signatures should have element names +dotnet_diagnostic.SA1414.severity = none + +# SA1500: Braces for multi-line statements should not share line +dotnet_diagnostic.SA1500.severity = none + +# SA1501: Statement should not be on a single line +dotnet_diagnostic.SA1501.severity = none + +# SA1502: Element should not be on a single line +dotnet_diagnostic.SA1502.severity = none + +# SA1503: Braces should not be omitted +dotnet_diagnostic.SA1503.severity = none + +# SA1504: All accessors should be single-line or multi-line +dotnet_diagnostic.SA1504.severity = none + +# SA1505: An opening brace should not be followed by a blank line +dotnet_diagnostic.SA1505.severity = none + +# SA1506: Element documentation headers should not be followed by blank line +dotnet_diagnostic.SA1506.severity = none + +# SA1507: Code should not contain multiple blank lines in a row +dotnet_diagnostic.SA1507.severity = none + +# SA1508: A closing brace should not be preceded by a blank line +dotnet_diagnostic.SA1508.severity = none + +# SA1509: Opening braces should not be preceded by blank line +dotnet_diagnostic.SA1509.severity = none + +# SA1510: 'else' statement should not be preceded by a blank line +dotnet_diagnostic.SA1510.severity = none + +# SA1512: Single-line comments should not be followed by blank line +dotnet_diagnostic.SA1512.severity = none + +# SA1513: Closing brace should be followed by blank line +dotnet_diagnostic.SA1513.severity = none + +# SA1514: Element documentation header should be preceded by blank line +dotnet_diagnostic.SA1514.severity = none + +# SA1515: Single-line comment should be preceded by blank line +dotnet_diagnostic.SA1515.severity = none + +# SA1516: Elements should be separated by blank line +dotnet_diagnostic.SA1516.severity = none + +# SA1517: Code should not contain blank lines at start of file +dotnet_diagnostic.SA1517.severity = none + +# SA1518: Code should not contain blank lines at the end of the file +dotnet_diagnostic.SA1518.severity = none + +# SA1519: Braces should not be omitted from multi-line child statement +dotnet_diagnostic.SA1519.severity = none + +# SA1520: Use braces consistently +dotnet_diagnostic.SA1520.severity = none + +# SA1600: Elements should be documented +dotnet_diagnostic.SA1600.severity = none + +# SA1601: Partial elements should be documented +dotnet_diagnostic.SA1601.severity = none + +# SA1602: Enumeration items should be documented +dotnet_diagnostic.SA1602.severity = none + +# SA1604: Element documentation should have summary +dotnet_diagnostic.SA1604.severity = none + +# SA1605: Partial element documentation should have summary +dotnet_diagnostic.SA1605.severity = none + +# SA1606: Element documentation should have summary text +dotnet_diagnostic.SA1606.severity = none + +# SA1608: Element documentation should not have default summary +dotnet_diagnostic.SA1608.severity = none + +# SA1610: Property documentation should have value text +dotnet_diagnostic.SA1610.severity = none + +# SA1611: The documentation for parameter 'message' is missing +dotnet_diagnostic.SA1611.severity = none + +# SA1612: The parameter documentation is at incorrect position +dotnet_diagnostic.SA1612.severity = none + +# SA1614: Element parameter documentation should have text +dotnet_diagnostic.SA1614.severity = none + +# SA1615: Element return value should be documented +dotnet_diagnostic.SA1615.severity = none + +# SA1616: Element return value documentation should have text +dotnet_diagnostic.SA1616.severity = none + +# SA1618: The documentation for type parameter is missing +dotnet_diagnostic.SA1618.severity = none + +# SA1619: The documentation for type parameter is missing +dotnet_diagnostic.SA1619.severity = none + +# SA1622: Generic type parameter documentation should have text +dotnet_diagnostic.SA1622.severity = none + +# SA1623: Property documentation text +dotnet_diagnostic.SA1623.severity = none + +# SA1624: Because the property only contains a visible get accessor, the documentation summary text should begin with 'Gets' +dotnet_diagnostic.SA1624.severity = none + +# SA1625: Element documentation should not be copied and pasted +dotnet_diagnostic.SA1625.severity = none + +# SA1626: Single-line comments should not use documentation style slashes +dotnet_diagnostic.SA1626.severity = none + +# SA1627: The documentation text within the \'exception\' tag should not be empty +dotnet_diagnostic.SA1627.severity = none + +# SA1629: Documentation text should end with a period +dotnet_diagnostic.SA1629.severity = none + +# SA1633: File should have header +dotnet_diagnostic.SA1633.severity = none + +# SA1642: Constructor summary documentation should begin with standard text +dotnet_diagnostic.SA1642.severity = none + +# SA1643: Destructor summary documentation should begin with standard text +dotnet_diagnostic.SA1643.severity = none + +# SA1649: File name should match first type name +dotnet_diagnostic.SA1649.severity = none + +# IDE0001: Simplify name +dotnet_diagnostic.IDE0001.severity = silent + +# IDE0002: Simplify member access +dotnet_diagnostic.IDE0002.severity = silent + +# IDE0003: Remove this or Me qualification +dotnet_diagnostic.IDE0003.severity = silent + +# IDE0004: Remove Unnecessary Cast +dotnet_diagnostic.IDE0004.severity = silent + +# IDE0005: Using directive is unnecessary. +dotnet_diagnostic.IDE0005.severity = silent + +# IDE0007: Use implicit type +dotnet_diagnostic.IDE0007.severity = silent + +# IDE0008: Use explicit type +dotnet_diagnostic.IDE0008.severity = silent + +# IDE0009: Add this or Me qualification +dotnet_diagnostic.IDE0009.severity = silent + +# IDE0010: Add missing cases +dotnet_diagnostic.IDE0010.severity = silent + +# IDE0011: Add braces +dotnet_diagnostic.IDE0011.severity = silent + +# IDE0016: Use 'throw' expression +dotnet_diagnostic.IDE0016.severity = silent + +# IDE0017: Simplify object initialization +dotnet_diagnostic.IDE0017.severity = silent + +# IDE0018: Inline variable declaration +dotnet_diagnostic.IDE0018.severity = silent + +# IDE0019: Use pattern matching to avoid as followed by a null check +dotnet_diagnostic.IDE0019.severity = silent + +# IDE0020: Use pattern matching to avoid is check followed by a cast (with variable) +dotnet_diagnostic.IDE0020.severity = silent + +# IDE0021: Use expression body for constructors +dotnet_diagnostic.IDE0021.severity = silent + +# IDE0022: Use expression body for methods +dotnet_diagnostic.IDE0022.severity = silent + +# IDE0023: Use expression body for operators +dotnet_diagnostic.IDE0023.severity = silent + +# IDE0024: Use expression body for operators +dotnet_diagnostic.IDE0024.severity = silent + +# IDE0025: Use expression body for properties +dotnet_diagnostic.IDE0025.severity = silent + +# IDE0026: Use expression body for indexers +dotnet_diagnostic.IDE0026.severity = silent + +# IDE0027: Use expression body for accessors +dotnet_diagnostic.IDE0027.severity = silent + +# IDE0028: Simplify collection initialization +dotnet_diagnostic.IDE0028.severity = silent + +# IDE0029: Use coalesce expression +dotnet_diagnostic.IDE0029.severity = silent + +# IDE0030: Use coalesce expression +dotnet_diagnostic.IDE0030.severity = silent + +# IDE0031: Use null propagation +dotnet_diagnostic.IDE0031.severity = silent + +# IDE0032: Use auto property +dotnet_diagnostic.IDE0032.severity = silent + +# IDE0033: Use explicitly provided tuple name +dotnet_diagnostic.IDE0033.severity = silent + +# IDE0034: Simplify 'default' expression +dotnet_diagnostic.IDE0034.severity = silent + +# IDE0035: Remove unreachable code +dotnet_diagnostic.IDE0035.severity = silent + +# IDE0036: Order modifiers +dotnet_diagnostic.IDE0036.severity = silent + +# IDE0037: Use inferred member name +dotnet_diagnostic.IDE0037.severity = silent + +# IDE0038: Use pattern matching to avoid is check followed by a cast (without variable) +dotnet_diagnostic.IDE0038.severity = silent + +# IDE0039: Use local function +dotnet_diagnostic.IDE0039.severity = silent + +# IDE0040: Add accessibility modifiers +dotnet_diagnostic.IDE0040.severity = silent + +# IDE0041: Use 'is null' check +dotnet_diagnostic.IDE0041.severity = silent + +# IDE0042: Deconstruct variable declaration +dotnet_diagnostic.IDE0042.severity = silent + +# IDE0043: Invalid format string +dotnet_diagnostic.IDE0043.severity = silent + +# IDE0044: Add readonly modifier +dotnet_diagnostic.IDE0044.severity = silent + +# IDE0045: Use conditional expression for assignment +dotnet_diagnostic.IDE0045.severity = silent + +# IDE0046: Use conditional expression for return +dotnet_diagnostic.IDE0046.severity = silent + +# IDE0047: Remove unnecessary parentheses +dotnet_diagnostic.IDE0047.severity = silent + +# IDE0048: Add parentheses for clarity +dotnet_diagnostic.IDE0048.severity = silent + +# IDE0049: Use language keywords instead of framework type names for type references +dotnet_diagnostic.IDE0049.severity = silent + +# IDE0050: Convert anonymous type to tuple +dotnet_diagnostic.IDE0050.severity = silent + +# IDE0051: Remove unused private members +dotnet_diagnostic.IDE0051.severity = silent + +# IDE0052: Remove unread private members +dotnet_diagnostic.IDE0052.severity = silent + +# IDE0053: Use expression body for lambdas +dotnet_diagnostic.IDE0053.severity = silent + +# IDE0054: Use compound assignment +dotnet_diagnostic.IDE0054.severity = silent + +# IDE0055: Fix formatting +dotnet_diagnostic.IDE0055.severity = silent + +# IDE0056: Use index operator +dotnet_diagnostic.IDE0056.severity = silent + +# IDE0057: Use range operator +dotnet_diagnostic.IDE0057.severity = silent + +# IDE0058: Expression value is never used +dotnet_diagnostic.IDE0058.severity = silent + +# IDE0059: Unnecessary assignment of a value +dotnet_diagnostic.IDE0059.severity = silent + +# IDE0060: Remove unused parameter +dotnet_diagnostic.IDE0060.severity = silent + +# IDE0061: Use expression body for local functions +dotnet_diagnostic.IDE0061.severity = silent + +# IDE0062: Make local function 'static' +dotnet_diagnostic.IDE0062.severity = silent + +# IDE0063: Use simple 'using' statement +dotnet_diagnostic.IDE0063.severity = silent + +# IDE0064: Make readonly fields writable +dotnet_diagnostic.IDE0064.severity = silent + +# IDE0065: Misplaced using directive +dotnet_diagnostic.IDE0065.severity = silent + +# IDE0066: Convert switch statement to expression +dotnet_diagnostic.IDE0066.severity = silent + +# IDE0070: Use 'System.HashCode' +dotnet_diagnostic.IDE0070.severity = silent + +# IDE0071: Simplify interpolation +dotnet_diagnostic.IDE0071.severity = silent + +# IDE0072: Add missing cases +dotnet_diagnostic.IDE0072.severity = silent + +# IDE0073: The file header is missing or not located at the top of the file +dotnet_diagnostic.IDE0073.severity = silent + +# IDE0074: Use compound assignment +dotnet_diagnostic.IDE0074.severity = silent + +# IDE0075: Simplify conditional expression +dotnet_diagnostic.IDE0075.severity = silent + +# IDE0076: Invalid global 'SuppressMessageAttribute' +dotnet_diagnostic.IDE0076.severity = silent + +# IDE0077: Avoid legacy format target in 'SuppressMessageAttribute' +dotnet_diagnostic.IDE0077.severity = silent + +# IDE0078: Use pattern matching +dotnet_diagnostic.IDE0078.severity = silent + +# IDE0079: RemoveUnnecessarySuppression +dotnet_diagnostic.IDE0079.severity = silent + +# IDE0080: Remove unnecessary suppression operator +dotnet_diagnostic.IDE0080.severity = silent + +# IDE0081: RemoveUnnecessaryByVal +dotnet_diagnostic.IDE0081.severity = silent + +# IDE0082: 'typeof' can be converted to 'nameof' +dotnet_diagnostic.IDE0082.severity = silent + +# IDE0083: Use pattern matching +dotnet_diagnostic.IDE0083.severity = silent + +# IDE0084: Use pattern matching (IsNot operator) +dotnet_diagnostic.IDE0084.severity = silent + +# IDE0090: Use 'new(...)' +dotnet_diagnostic.IDE0090.severity = silent + +# IDE0100: Remove redundant equality +dotnet_diagnostic.IDE0100.severity = silent + +# IDE0110: Remove unnecessary discard +dotnet_diagnostic.IDE0110.severity = silent + +# IDE0120: Simplify LINQ expression +dotnet_diagnostic.IDE0120.severity = silent + +# IDE0130: Namespace does not match folder structure +dotnet_diagnostic.IDE0130.severity = silent + +# IDE0140: Simplify object creation +dotnet_diagnostic.IDE0140.severity = silent + +# IDE0150: Prefer 'null' check over type check +dotnet_diagnostic.IDE0150.severity = silent + +# IDE0160: Convert to block scoped namespace +dotnet_diagnostic.IDE0160.severity = silent + +# IDE0161: Convert to file-scoped namespace +dotnet_diagnostic.IDE0161.severity = silent + +# IDE1005: Delegate invocation can be simplified. +dotnet_diagnostic.IDE1005.severity = silent + +# IDE1006: Naming Styles +dotnet_diagnostic.IDE1006.severity = silent + +# IDE2000: C# +dotnet_diagnostic.IDE2000.severity = silent + +# IDE2001: Embedded statements must be on their own line +dotnet_diagnostic.IDE2001.severity = silent + +# IDE2002: Consecutive braces must not have blank line between them +dotnet_diagnostic.IDE2002.severity = silent + +# IDE2003: C# +dotnet_diagnostic.IDE2003.severity = silent + +# IDE2004: Blank line not allowed after constructor initializer colon +dotnet_diagnostic.IDE2004.severity = silent + +# xUnit1000: Test classes must be public +dotnet_diagnostic.xUnit1000.severity = warning + +# xUnit1001: Fact methods cannot have parameters +dotnet_diagnostic.xUnit1001.severity = warning + +# xUnit1002: Test methods cannot have multiple Fact or Theory attributes +dotnet_diagnostic.xUnit1002.severity = warning + +# xUnit1003: Theory methods must have test data +dotnet_diagnostic.xUnit1003.severity = warning + +# xUnit1004: Test methods should not be skipped +dotnet_diagnostic.xUnit1004.severity = warning + +# xUnit1005: Fact methods should not have test data +dotnet_diagnostic.xUnit1005.severity = warning + +# xUnit1006: Theory methods should have parameters +dotnet_diagnostic.xUnit1006.severity = warning + +# xUnit1007: ClassData must point at a valid class +dotnet_diagnostic.xUnit1007.severity = warning + +# xUnit1008: Test data attribute should only be used on a Theory +dotnet_diagnostic.xUnit1008.severity = warning + +# xUnit1009: InlineData must match the number of method parameters +dotnet_diagnostic.xUnit1009.severity = warning + +# xUnit1010: The value is not convertible to the method parameter type +dotnet_diagnostic.xUnit1010.severity = warning + +# xUnit1011: There is no matching method parameter +dotnet_diagnostic.xUnit1011.severity = warning + +# xUnit1012: Null should not be used for value type parameters +dotnet_diagnostic.xUnit1012.severity = warning + +# xUnit1013: Public methods should be marked as test +dotnet_diagnostic.xUnit1013.severity = warning + +# xUnit1014: MemberData should use nameof operator for member name +dotnet_diagnostic.xUnit1014.severity = warning + +# xUnit1015: MemberData must reference an existing member +dotnet_diagnostic.xUnit1015.severity = warning + +# xUnit1016: MemberData must reference a public member +dotnet_diagnostic.xUnit1016.severity = warning + +# xUnit1017: MemberData must reference a static member +dotnet_diagnostic.xUnit1017.severity = warning + +# xUnit1018: MemberData must reference a valid member kind +dotnet_diagnostic.xUnit1018.severity = warning + +# xUnit1019: MemberData must reference a member providing a valid data type +dotnet_diagnostic.xUnit1019.severity = warning + +# xUnit1020: MemberData must reference a property with a getter +dotnet_diagnostic.xUnit1020.severity = warning + +# xUnit1021: MemberData should not have parameters if the referenced member is not a method +dotnet_diagnostic.xUnit1021.severity = warning + +# xUnit1022: Theory methods cannot have a parameter array +dotnet_diagnostic.xUnit1022.severity = warning + +# xUnit1023: Theory methods cannot have default parameter values +dotnet_diagnostic.xUnit1023.severity = warning + +# xUnit1024: Test methods cannot have overloads +dotnet_diagnostic.xUnit1024.severity = warning + +# xUnit1025: InlineData should be unique within the Theory it belongs to +dotnet_diagnostic.xUnit1025.severity = warning + +# xUnit1026: Theory methods should use all of their parameters +dotnet_diagnostic.xUnit1026.severity = warning + +# xUnit2000: Constants and literals should be the expected argument +dotnet_diagnostic.xUnit2000.severity = warning + +# xUnit2001: Do not use invalid equality check +dotnet_diagnostic.xUnit2001.severity = warning + +# xUnit2002: Do not use null check on value type +dotnet_diagnostic.xUnit2002.severity = warning + +# xUnit2003: Do not use equality check to test for null value +dotnet_diagnostic.xUnit2003.severity = warning + +# xUnit2004: Do not use equality check to test for boolean conditions +dotnet_diagnostic.xUnit2004.severity = warning + +# xUnit2005: Do not use identity check on value type +dotnet_diagnostic.xUnit2005.severity = warning + +# xUnit2006: Do not use invalid string equality check +dotnet_diagnostic.xUnit2006.severity = warning + +# xUnit2007: Do not use typeof expression to check the type +dotnet_diagnostic.xUnit2007.severity = warning + +# xUnit2008: Do not use boolean check to match on regular expressions +dotnet_diagnostic.xUnit2008.severity = warning + +# xUnit2009: Do not use boolean check to check for substrings +dotnet_diagnostic.xUnit2009.severity = warning + +# xUnit2010: Do not use boolean check to check for string equality +dotnet_diagnostic.xUnit2010.severity = warning + +# xUnit2011: Do not use empty collection check +dotnet_diagnostic.xUnit2011.severity = warning + +# xUnit2012: Do not use Enumerable.Any() to check if a value exists in a collection +dotnet_diagnostic.xUnit2012.severity = warning + +# xUnit2013: Do not use equality check to check for collection size. +dotnet_diagnostic.xUnit2013.severity = none + +# xUnit2014: Do not use throws check to check for asynchronously thrown exception +dotnet_diagnostic.xUnit2014.severity = none + +# xUnit2015: Do not use typeof expression to check the exception type +dotnet_diagnostic.xUnit2015.severity = warning + +# xUnit2016: Keep precision in the allowed range when asserting equality of doubles or decimals +dotnet_diagnostic.xUnit2016.severity = warning + +# xUnit2017: Do not use Contains() to check if a value exists in a collection +dotnet_diagnostic.xUnit2017.severity = none + +# xUnit2018: Do not compare an object's exact type to an abstract class or interface +dotnet_diagnostic.xUnit2018.severity = warning + +# xUnit2019: Do not use obsolete throws check to check for asynchronously thrown exception +dotnet_diagnostic.xUnit2019.severity = warning + +# xUnit3000: Test case classes must derive directly or indirectly from Xunit.LongLivedMarshalByRefObject +dotnet_diagnostic.xUnit3000.severity = warning + +# xUnit3001: Classes that implement Xunit.Abstractions.IXunitSerializable must have a public parameterless constructor +dotnet_diagnostic.xUnit3001.severity = warning + +# !!! +dotnet_diagnostic.xUnit3001.severity = warning + +# !!! OVERRIDES +# !!! Note: It is preferred to minimize the overrides if possible (just to follow the MS dotnet convention as much as possible) diff --git a/lang/csharp/README.md b/lang/csharp/README.md index 39dee79bb0c..6a7c7283b21 100644 --- a/lang/csharp/README.md +++ b/lang/csharp/README.md @@ -1,4 +1,4 @@ -# Avro C# [![Build Status](https://travis-ci.org/apache/avro.svg?branch=master)](https://travis-ci.org/apache/avro) [![NuGet Package](https://img.shields.io/nuget/v/Apache.Avro.svg)](https://www.nuget.org/packages/Apache.Avro) +# Avro C# [![Test C#](https://github.com/apache/avro/actions/workflows/test-lang-csharp.yml/badge.svg)](https://github.com/apache/avro/actions/workflows/test-lang-csharp.yml) [![NuGet Package](https://img.shields.io/nuget/v/Apache.Avro.svg)](https://www.nuget.org/packages/Apache.Avro) [![Avro](https://avro.apache.org/images/avro-logo.png)](http://avro.apache.org/) @@ -12,9 +12,26 @@ Install-Package Apache.Avro ## Build & Test -1. Install [.NET SDK 5.0+](https://dotnet.microsoft.com/download/dotnet-core) +1. Install [.NET SDK 8.0+](https://dotnet.microsoft.com/download/dotnet-core) 2. `dotnet test` +## Project Target Frameworks + +| Project | Published to nuget.org | Type | .NET Standard 2.0 | .NET Standard 2.1 | .NET 6.0 | .NET 7.0 | .NET 8.0 | +|:-------------------:|:--------------------------:|:----------:|:------------------:|:-----------------:|:---------:|:---------:|:---------:| +| Avro.main | Apache.Avro | Library | âœ”ī¸ | âœ”ī¸ | | | | +| Avro.File.Snappy | Apache.Avro.File.Snappy | Library | âœ”ī¸ | âœ”ī¸ | | | | +| Avro.File.BZip2 | Apache.Avro.File.BZip2 | Library | âœ”ī¸ | âœ”ī¸ | | | | +| Avro.File.XZ | Apache.Avro.File.XZ | Library | âœ”ī¸ | âœ”ī¸ | | | | +| Avro.File.Zstandard | Apache.Avro.File.Zstandard | Library | âœ”ī¸ | âœ”ī¸ | | | | +| Avro.codegen | Apache.Avro.Tools | Exe | | |âœ”ī¸ |âœ”ī¸ |âœ”ī¸ | +| Avro.ipc | | Library | âœ”ī¸ | âœ”ī¸ | | | | +| Avro.ipc.test | | Unit Tests | | |âœ”ī¸ |âœ”ī¸ |âœ”ī¸ | +| Avro.msbuild | | Library | âœ”ī¸ | âœ”ī¸ | | | | +| Avro.perf | | Exe | | |âœ”ī¸ |âœ”ī¸ |âœ”ī¸ | +| Avro.test | | Unit Tests | | |âœ”ī¸ |âœ”ī¸ |âœ”ī¸ | +| Avro.benchmark | | Exe | | |âœ”ī¸ |âœ”ī¸ |âœ”ī¸ | + ## Dependency package version strategy 1. Use [`versions.props`](./versions.props) to specify package versions. `PackageReference` elements in `.csproj` files should use only version properties defined in [`versions.props`](./versions.props). @@ -25,3 +42,7 @@ In short, we should only update the version of the dependencies in our libraries ## Notes The [LICENSE](./LICENSE) and [NOTICE](./NOTICE) files in the lang/csharp source directory are used to build the binary distribution. The [LICENSE.txt](../../LICENSE.txt) and [NOTICE.txt](../../NOTICE.txt) information for the Avro C# source distribution is in the root directory. + +## Styling Guidelines + +Can be found in [STYLING](./STYLING.MD). diff --git a/lang/csharp/STYLING.md b/lang/csharp/STYLING.md new file mode 100644 index 00000000000..948eddaba8e --- /dev/null +++ b/lang/csharp/STYLING.md @@ -0,0 +1,1595 @@ + +# C# Styling Rules for Apache.Avro + +The following rules are currently used within the .editorconfig of the Avro solution. Any changes to this documentation should be reflected in the .editorconfig file and vice versa. + +Notes + - The examples shown are based on the current settings in .editorconfig + - :exclamation: Not defined :exclamation: means we have not set a preference + - There are cases where it is not explicitly defined in the .editorconfig, but there is a default option + - The project currently targets a framework that uses C# 7.3 + - When violating a formatting rule it may show up as an IDE0055 Fix formatting violation. + +## New line preferences + +### csharp_new_line_before_open_brace +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#csharp_new_line_before_open_brace) + +This rule concerns whether an open brace { should be placed on the same line as the preceding code, or on a new line. + +**Example** +``` +void MyMethod() +{ + if (...) + { + ... + } +} +``` +--- +### csharp_new_line_before_else +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#csharp_new_line_before_else) + +**Example** +``` +if (...) { + ... +} +else { + ... +} +``` +--- +### csharp_new_line_before_catch +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#csharp_new_line_before_catch) + +**Example** +``` +try { + ... +} +catch (Exception e) { + ... +} +``` +--- +### csharp_new_line_before_finally +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#csharp_new_line_before_finally) + +**Example** +``` +try { + ... +} +catch (Exception e) { + ... +} +finally { + ... +} +``` +--- +### csharp_new_line_before_members_in_object_initializers +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#csharp_new_line_before_members_in_object_initializers) + +**Example** +``` +var z = new B() +{ + A = 3, + B = 4 +} +``` +--- +### csharp_new_line_before_members_in_anonymous_types +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#csharp_new_line_before_members_in_anonymous_types) + +**Example** +``` +var z = new +{ + A = 3, + B = 4 +} +``` +--- +### csharp_new_line_between_query_expression_clauses +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#csharp_new_line_between_query_expression_clauses) + +**Example** +``` +var q = from a in e + from b in e + select a * b; +``` +--- + +## Indentation preferences + +### csharp_indent_case_contents +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#csharp_indent_case_contents) + +**Example** +``` +switch(c) { + case Color.Red: + Console.WriteLine("The color is red"); + break; + case Color.Blue: + Console.WriteLine("The color is blue"); + break; + default: + Console.WriteLine("The color is unknown."); + break; +} +``` +--- +### csharp_indent_switch_labels +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#csharp_indent_switch_labels) + +**Example** +``` +switch(c) { + case Color.Red: + Console.WriteLine("The color is red"); + break; + case Color.Blue: + Console.WriteLine("The color is blue"); + break; + default: + Console.WriteLine("The color is unknown."); + break; +} +``` +--- +### csharp_indent_labels +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#csharp_indent_labels) + +Labels are placed at one less indent to the current context + +**Example** +``` +class C +{ + private string MyMethod(...) + { + if (...) { + goto error; + } + error: + throw new Exception(...); + } +} +``` +--- +### csharp_indent_block_contents +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#csharp_indent_block_contents) + +**Example** +``` +static void Hello() +{ + Console.WriteLine("Hello"); +} +``` +--- +### csharp_indent_braces +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#csharp_indent_braces) + +**Example** +``` +static void Hello() +{ + Console.WriteLine("Hello"); +} +``` +--- +### csharp_indent_case_contents_when_block +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#csharp_indent_case_contents_when_block) + +**Example** +``` +case 0: + { + Console.WriteLine("Hello"); + break; + } +``` +--- + +## Spacing Preferences + +### csharp_space_after_cast +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#csharp_space_after_cast) + +**Example** +``` +int y = (int)x; +``` +--- +### csharp_space_after_keywords_in_control_flow_statements +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#csharp_space_after_keywords_in_control_flow_statements) + +**Example** +``` +for (int i;i>, &, ^, |) precedence + +Default is always_for_clarity + +**Example** +``` +var v = a + (b * c); +``` +--- +### dotnet_style_parentheses_in_relational_binary_operators +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0047-ide0048#dotnet_style_parentheses_in_relational_binary_operators) + +Prefer parentheses to clarify relational operator (>, <, <=, >=, is, as, ==, !=) precedence + +Default is always_for_clarity + +**Example** +``` +var v = (a < b) == (c > d); +``` +--- +### dotnet_style_parentheses_in_other_binary_operators +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0047-ide0048#dotnet_style_parentheses_in_other_binary_operators) + +Prefer parentheses to clarify other binary operator (&&, ||, ??) precedence + +Default is always_for_clarity + +**Example** +``` +var v = a || (b && c); +``` +--- +### dotnet_style_parentheses_in_other_operators +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0047-ide0048#dotnet_style_parentheses_in_other_operators) + +Prefer to not have parentheses when operator precedence is obvious + +Default is never_if_unnecessary + +**Example** +``` +var v = a.b.Length; +``` +--- + +## Expression-level preferences + +### dotnet_style_object_initializer +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0017#dotnet_style_object_initializer) + +Prefer objects to be initialized using object initializers when possible + +default is true + +**Example** +``` +var c = new Customer() { Age = 21 }; +``` +--- +### csharp_style_inlined_variable_declaration +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0018#csharp_style_inlined_variable_declaration) + +Prefer out variables to be declared inline in the argument list of a method call when possible + +**Example** +``` +if (int.TryParse(value, out int i) {...} +``` +--- +### dotnet_style_collection_initializer +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0028#dotnet_style_collection_initializer) + +Prefer collections to be initialized using collection initializers when possible + +**Example** +``` +var list = new List { 1, 2, 3 }; +``` +--- +### dotnet_style_prefer_auto_properties +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0032#dotnet_style_prefer_auto_properties) + +Prefer auto properties over properties with private backing fields + +**Example** +``` +private int Age { get; } +``` +--- +### dotnet_style_explicit_tuple_names +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0033#dotnet_style_explicit_tuple_names) + +Prefer tuple names to ItemX properties + +**Example** +``` +(string name, int age) customer = GetCustomer(); +var name = customer.name; +``` +--- +### csharp_prefer_simple_default_expression +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0034#csharp_prefer_simple_default_expression) + +Prefer default over default(T) + +**Example** +``` +void DoWork(CancellationToken cancellationToken = default) { ... } +``` +--- +### dotnet_style_prefer_inferred_tuple_names +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0037#dotnet_style_prefer_inferred_tuple_names) + +Prefer inferred tuple element names + +**Example** +``` +var tuple = (age, name); +``` +--- +### dotnet_style_prefer_inferred_anonymous_type_member_names +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0037#dotnet_style_prefer_inferred_anonymous_type_member_names) + +Prefer inferred anonymous type member names + +**Example** +``` +var anon = new { age, name }; +``` +--- +### csharp_style_pattern_local_over_anonymous_function +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0039#csharp_style_pattern_local_over_anonymous_function) + +Prefer anonymous functions over local functions + +**Example** +``` +Func fibonacci = null; +fibonacci = (int n) => +{ + return n <= 1 ? 1 : fibonacci(n - 1) + fibonacci(n - 2); +}; +``` +--- +### csharp_style_deconstructed_variable_declaration +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0042#csharp_style_deconstructed_variable_declaration) + +Prefer deconstructed variable declaration + +default is true + +**Example** +``` +var (name, age) = GetPersonTuple(); +Console.WriteLine($"{name} {age}"); + +(int x, int y) = GetPointTuple(); +Console.WriteLine($"{x} {y}"); +``` +--- +### dotnet_style_prefer_conditional_expression_over_assignment +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0045#dotnet_style_prefer_conditional_expression_over_assignment) + +Prefer assignments with a ternary conditional over an if-else statement + +**Example** +``` +string s = expr ? "hello" : "world"; +``` +--- +### dotnet_style_prefer_conditional_expression_over_return +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0046#dotnet_style_prefer_conditional_expression_over_return) + +Prefer return statements to use a ternary conditional over an if-else statement + +**Example** +``` +return expr ? "hello" : "world" +``` +--- +### dotnet_style_prefer_compound_assignment +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0054-ide0074#dotnet_style_prefer_compound_assignment) + +Prefer compound assignment expressions + +default is true + +**Example** +``` +x += 1; +``` +--- +### dotnet_style_prefer_simplified_boolean_expressions +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0075#dotnet_style_prefer_simplified_boolean_expressions) + +Prefer simplified conditional expressions + +default is true + +**Example** +``` +var result1 = M1() && M2(); +var result2 = M1() || M2(); +``` +--- +### csharp_style_implicit_object_creation_when_type_is_apparent +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0090#csharp_style_implicit_object_creation_when_type_is_apparent) + +Prefer target-typed new expressions when created type is apparent + +default is true + +**Example** +``` +C c = new(); +C c2 = new() { Field = 0 }; +``` +--- + +## Null-checking Preferences + +### csharp_style_throw_expression +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0016#csharp_style_throw_expression) + +Prefer to use throw expressions instead of throw statements + +**Example** +``` +_s = s ?? throw new ArgumentNullException(nameof(s)); +``` +--- +### dotnet_style_coalesce_expression +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0029-ide0030#dotnet_style_coalesce_expression) + +Prefer null coalescing expressions to ternary operator checking + +**Example** +``` +var v = x ?? y; +``` +--- +### dotnet_style_coalesce_expression +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0031#dotnet_style_null_propagation) + +Prefer to use null-conditional operator when possible + +**Example** +``` +string v = o?.ToString(); +``` +--- +### dotnet_style_prefer_is_null_check_over_reference_equality_method +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0041#dotnet_style_prefer_is_null_check_over_reference_equality_method) + +Prefer is null check over reference equality method + +**Example** +``` +if (value is null) + return; +``` +--- +### csharp_style_conditional_delegate_call +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide1005#csharp_style_conditional_delegate_call) + +Prefer to use the conditional coalescing operator (?.) when invoking a lambda expression, instead of performing a null check + +**Example** +``` +func?.Invoke(args); +``` +--- + +## var Preferences + +### csharp_style_var_for_built_in_types +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0007-ide0008#csharp_style_var_for_built_in_types) + +Prefer explicit type over var to declare variables with built-in system types such as int + +**Example** +``` +int x = 5; +``` +--- +### csharp_style_var_when_type_is_apparent +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0007-ide0008#csharp_style_var_when_type_is_apparent) + +Prefer explicit type over var when the type is already mentioned on the right-hand side of a declaration expression + +**Example** +``` +Customer obj = new Customer(); +``` +--- +### csharp_style_var_elsewhere +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0007-ide0008#csharp_style_var_elsewhere) + +Prefer explicit type over var in all cases, unless overridden by another code style rule + +**Example** +``` +bool f = this.Init(); +``` +--- + +## Expression-bodied member Preferences + +### csharp_style_expression_bodied_constructors +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0021#csharp_style_expression_bodied_constructors) + +Prefer expression bodies for constructors + +**Example** +``` +public Customer(int age) => Age = age; +``` +--- +### csharp_style_expression_bodied_methods +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0022#csharp_style_expression_bodied_methods) + +Prefer expression bodies for methods + +**Example** +``` +public int GetAge() => this.Age; +``` +--- +### csharp_style_expression_bodied_operators +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0023-ide0024#csharp_style_expression_bodied_operators) + +Prefer expression bodies for operators + +**Example** +``` +public static ComplexNumber operator + (ComplexNumber c1, ComplexNumber c2) + => new ComplexNumber(c1.Real + c2.Real, c1.Imaginary + c2.Imaginary); +``` +--- +### csharp_style_expression_bodied_properties +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0025#csharp_style_expression_bodied_properties) + +Prefer expression bodies for properties + +**Example** +``` +public int Age => _age; +``` +--- +### csharp_style_expression_bodied_indexers +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0026#csharp_style_expression_bodied_indexers) + +Prefer expression bodies for indexers + +**Example** +``` +public T this[int i] => _values[i]; +``` +--- +### csharp_style_expression_bodied_accessors +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0027#csharp_style_expression_bodied_accessors) + +Prefer expression bodies for accessors + +**Example** +``` +public int Age { get => _age; set => _age = value; } +``` +--- +### csharp_style_expression_bodied_lambdas +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0053#csharp_style_expression_bodied_lambdas) + +Prefer expression bodies for lambdas + +**Example** +``` +Func square = x => x * x; +``` +--- +### csharp_style_expression_bodied_local_functions +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0061#csharp_style_expression_bodied_local_functions) + +Prefer expression bodies for local functions + +**Example** +``` +void M() +{ + Hello(); + void Hello() => Console.WriteLine("Hello"); +} +``` +--- + +## Pattern matching Preferences + +### csharp_style_pattern_matching_over_as_with_null_check +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0019#csharp_style_pattern_matching_over_as_with_null_check) + +Prefer pattern matching instead of as expressions with null checks to determine if something is of a particular type + +**Example** +``` +if (o is string s) {...} +``` +--- +### csharp_style_pattern_matching_over_is_with_cast_check +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0020-ide0038#csharp_style_pattern_matching_over_is_with_cast_check) + +Prefer pattern matching instead of is expressions with type casts + +**Example** +``` +if (o is int i) {...} +``` +--- +### csharp_style_prefer_switch_expression +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0066#csharp_style_prefer_switch_expression) + +Prefer to use a switch expression (introduced with C# 8.0) + +**Example** +``` +return x switch +{ + 1 => 1 * 1, + 2 => 2 * 2, + _ => 0, +}; +``` +--- +### csharp_style_prefer_not_pattern +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0083#csharp_style_prefer_not_pattern) + +Prefer to use 'not' pattern, when possible (introduced with C# 9.0) + +Default is true + +**Example** +``` +var y = o is not C c; +``` +--- + +## Code block Prerferences + +### csharp_prefer_braces +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0011#csharp_prefer_braces) + +Prefer curly braces even for one line of code + +**Example** +``` +if (test) { this.Display(); } +``` +--- +### csharp_prefer_simple_using_statement +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0063#csharp_prefer_simple_using_statement) + +Don't prefer to use a simple using statement + +**Example** +``` +using (var a = b) { } +``` +--- + +## File Header Preferences + +### file_header_template +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0073#file_header_template) + +unset or empty string - Do not require file header. + +Default is unset + +**Example** +``` +namespace N2 +{ + class C2 { } +} +``` +--- + +## Naming Rules + +### Non-private static fields are PascalCase +**Example** +``` +public static MyString = "value"; +protected static MyString = "value"; +internal static MyString = "value"; +protected_internal static MyString = "value"; +private_protected static MyString = "value";; +``` +--- +### Constants are PascalCase +**Example** +``` +public const string MyConstant = "value"; +``` +--- +### Static fields are camelCase and start with s_ +**Example** +``` +private static int s_myInt; +``` +--- +# Instance fields are camelCase and start with _ +**Example** +``` +private int _myInt; + +internal string _myString; +``` +--- +# Locals and parameters are camelCase +**Example** +``` +private static string GetText(string path, string filename) +{ + var reader = File.OpenText($"{AppendPathSeparator(path)}{filename}"); + var text = reader.ReadToEnd(); + return text; + + string AppendPathSeparator(string filepath) + { + return filepath.EndsWith(@"\") ? filepath : filepath + @"\"; + } +} +``` +--- +# Local functions are PascalCase +**Example** +``` +private static string GetText(string path, string filename) +{ + var reader = File.OpenText($"{AppendPathSeparator(path)}{filename}"); + var text = reader.ReadToEnd(); + return text; + + string AppendPathSeparator(string filepath) + { + return filepath.EndsWith(@"\") ? filepath : filepath + @"\"; + } +} +``` +--- +# By default, name items with PascalCase +**Example** +``` +public void MyMethod() { }; +``` +--- + +## Formatting Rules + +### dotnet_sort_system_directives_first +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#dotnet_sort_system_directives_first) + +Sort System.* using directives alphabetically, and place them before other using directives. + +**Example** +``` +using System.Collections.Generic; +using System.Threading.Tasks; +using Avro; +``` +--- +### dotnet_separate_import_directive_groups +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#dotnet_separate_import_directive_groups) + +:exclamation: Not defined :exclamation: + +**Example** +``` +// dotnet_separate_import_directive_groups = true +using System.Collections.Generic; +using System.Threading.Tasks; + +using Avro; + +// dotnet_separate_import_directive_groups = false +using System.Collections.Generic; +using System.Threading.Tasks; +using Avro; +``` +--- +### dotnet_style_namespace_match_folder +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#dotnet_style_namespace_match_folder) + +:exclamation: Not defined :exclamation: + +**Example** +``` +// dotnet_style_namespace_match_folder = true +// file path: Example/Convention/C.cs +using System; + +namespace Example.Convention +{ + class C + { + } +} + +// dotnet_style_namespace_match_folder = false +// file path: Example/Convention/C.cs +using System; + +namespace Example +{ + class C + { + } +} +``` +--- + +## Unnecessary Code Rules + +### Simplify name (IDE0001) +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0001) +**Example** +``` +using System.IO; +class C +{ + // IDE0001: 'System.IO.FileInfo' can be simplified to 'FileInfo' + System.IO.FileInfo file; + + // Fixed code + FileInfo file; +} +``` +--- +### Simplify member access (IDE0002) +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0002) +**Example** +``` +static void M1() { } +static void M2() +{ + // IDE0002: 'C.M1' can be simplified to 'M1' + C.M1(); + + // Fixed code + M1(); +} +``` +--- +### Remove unnecessary cast (IDE0004) +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0004) +**Example** +``` +// Code with violations +int v = (int)0; + +// Fixed code +int v = 0; +``` +--- +### Remove unnecessary import (IDE0005) +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0005) +**Example** +``` +// Code with violations +using System; +using System.IO; // IDE0005: Using directive is unnecessary +class C +{ + public static void M() + { + Console.WriteLine("Hello"); + } +} + +// Fixed code +using System; +class C +{ + public static void M() + { + Console.WriteLine("Hello"); + } +} +``` +--- +### Remove unreachable code (IDE0035) +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0035) +**Example** +``` +// Code with violations +void M() +{ + throw new System.Exception(); + + // IDE0035: Remove unreachable code + int v = 0; +} + +// Fixed code +void M() +{ + throw new System.Exception(); +} +``` +--- +### Remove unused private member (IDE0051) +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0051) +**Example** +``` +// Code with violations +class C +{ + // IDE0051: Remove unused private members + private readonly int _fieldPrivate; + private int PropertyPrivate => 1; + private int GetNumPrivate() => 1; + + // No IDE0051 + internal readonly int FieldInternal; + private readonly int _fieldPrivateUsed; + public int PropertyPublic => _fieldPrivateUsed; + private int GetNumPrivateUsed() => 1; + internal int GetNumInternal() => GetNumPrivateUsed(); + public int GetNumPublic() => GetNumPrivateUsed(); +} + +// Fixed code +class C +{ + // No IDE0051 + internal readonly int FieldInternal; + private readonly int _fieldPrivateUsed; + public int PropertyPublic => _fieldPrivateUsed; + private int GetNumPrivateUsed() => 1; + internal int GetNumInternal() => GetNumPrivateUsed(); + public int GetNumPublic() => GetNumPrivateUsed(); +} +``` +--- +### Remove unread private member (IDE0052) +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0052) +**Example** +``` +class C +{ + // IDE0052: Remove unread private members + private readonly int _field1; + private int _field2; + private int Property { get; set; } + + public C() + { + _field1 = 0; + } + + public void SetMethod() + { + _field2 = 0; + Property = 0; + } +} + +// Fixed code +class C +{ + public C() + { + } + + public void SetMethod() + { + } +} +``` +--- +### csharp_style_unused_value_expression_statement_preference +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0058#csharp_style_unused_value_expression_statement_preference) + +Prefer to assign an unused expression to a discard + +Default is discard_variable + +**Example** +``` +_ = System.Convert.ToInt32("35"); +``` +--- +### csharp_style_unused_value_assignment_preference +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0059#csharp_style_unused_value_assignment_preference) + +Prefer to use a discard when assigning a value that's not used + +Default is discard_variable + +**Example** +``` +int GetCount(Dictionary wordCount, string searchWord) +{ + _ = wordCount.TryGetValue(searchWord, out var count); + return count; +} +``` +--- +### dotnet_code_quality_unused_parameters +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0060#dotnet_code_quality_unused_parameters) + +Flag methods with any accessibility that contain unused parameters + +Default is all + +**Example** +``` +public int GetNum1(int unusedParam) { return 1; } +internal int GetNum2(int unusedParam) { return 1; } +private int GetNum3(int unusedParam) { return 1; } +``` +--- +### dotnet_remove_unnecessary_suppression_exclusions +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0079#dotnet_remove_unnecessary_suppression_exclusions) + + enables the rule for all rule IDs and rule categories + +Default is none + +**Example** +``` +using System.Diagnostics.CodeAnalysis; + +class C1 +{ + // 'dotnet_remove_unnecessary_suppression_exclusions = IDE0051' + + // Unnecessary pragma suppression, but not flagged by IDE0079 +#pragma warning disable IDE0051 // IDE0051: Remove unused member + private int UsedMethod() => 0; +#pragma warning restore IDE0051 + + public int PublicMethod() => UsedMethod(); +} +``` +--- +### Remove unnecessary suppression operator (IDE0080) +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0080) +**Example** +``` +// Code with violations +if (o !is string) { } + +// Potential fixes: +// 1. +if (o is not string) { } + +// 2. +if (!(o is string)) { } + +// 3. +if (o is string) { } +``` +--- +### Remove unnecessary equality operator (IDE0100) +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0100) +**Example** +``` +// Code with violations +if (x == true) { } +if (M() != false) { } + +// Fixed code +if (x) { } +if (M()) { } +``` +--- +### Remove unnecessary discard (IDE0110) +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0110) +**Example** +``` +// Code with violations +switch (o) +{ + case int _: + Console.WriteLine("Value was an int"); + break; + case string _: + Console.WriteLine("Value was a string"); + break; +} + +// Fixed code +switch (o) +{ + case int: + Console.WriteLine("Value was an int"); + break; + case string: + Console.WriteLine("Value was a string"); + break; +} +``` +--- + +## Miscellaneous Rules + +### Remove invalid global 'SuppressMessageAttribute' (IDE0076) +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0076) +**Example** +``` +// IDE0076: Invalid target '~F:N.C.F2' - no matching field named 'F2' +[assembly: System.Diagnostics.CodeAnalysis.SuppressMessage("Category", "Id: Title", Scope = "member", Target = "~F:N.C.F2")] +// IDE0076: Invalid scope 'property' +[assembly: System.Diagnostics.CodeAnalysis.SuppressMessage("Category", "Id: Title", Scope = "property", Target = "~P:N.C.P")] + +// Fixed code +[assembly: System.Diagnostics.CodeAnalysis.SuppressMessage("Category", "Id: Title", Scope = "member", Target = "~F:N.C.F")] +[assembly: System.Diagnostics.CodeAnalysis.SuppressMessage("Category", "Id: Title", Scope = "member", Target = "~P:N.C.P")] + +namespace N +{ + class C + { + public int F; + public int P { get; } + } +} +``` +--- +### Avoid legacy format target in global 'SuppressMessageAttribute' (IDE0077) +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0077) +**Example** +``` +// IDE0077: Legacy format target 'N.C.#F' +[assembly: System.Diagnostics.CodeAnalysis.SuppressMessage("Category", "Id: Title", Scope = "member", Target = "N.C.#F")] + +// Fixed code +[assembly: System.Diagnostics.CodeAnalysis.SuppressMessage("Category", "Id: Title", Scope = "member", Target = "~F:N.C.F")] + +namespace N +{ + class C + { + public int F; + } +} +``` +--- diff --git a/lang/csharp/build.sh b/lang/csharp/build.sh index fefbe28e9ba..2efd4463174 100755 --- a/lang/csharp/build.sh +++ b/lang/csharp/build.sh @@ -35,14 +35,14 @@ do test) dotnet build --configuration Release Avro.sln - # AVRO-2442: Explictly set LANG to work around ICU bug in `dotnet test` - LANG=en_US.UTF-8 dotnet test --configuration Release --no-build \ + # AVRO-2442: Explicitly set LANG to work around ICU bug in `dotnet test` + LANG=en_US.UTF-8 dotnet test --configuration Release --no-build \ --filter "TestCategory!=Interop" Avro.sln ;; perf) pushd ./src/apache/perf/ - dotnet run --configuration Release --framework net5.0 + dotnet run --configuration Release --framework net8.0 ;; dist) @@ -50,18 +50,25 @@ do dotnet pack --configuration Release Avro.sln # add the binary LICENSE and NOTICE to the tarball - mkdir build/ + mkdir -p build/ cp LICENSE NOTICE build/ # add binaries to the tarball - mkdir build/main/ + mkdir -p build/main/ cp -R src/apache/main/bin/Release/* build/main/ - mkdir build/codegen/ + # add codec binaries to the tarball + for codec in Avro.File.Snappy Avro.File.BZip2 Avro.File.XZ Avro.File.Zstandard + do + mkdir -p build/codec/$codec/ + cp -R src/apache/codec/$codec/bin/Release/* build/codec/$codec/ + done + # add codegen binaries to the tarball + mkdir -p build/codegen/ cp -R src/apache/codegen/bin/Release/* build/codegen/ # build the tarball mkdir -p ${ROOT}/dist/csharp - (cd build; tar czf ${ROOT}/../dist/csharp/avro-csharp-${VERSION}.tar.gz main codegen LICENSE NOTICE) + (cd build; tar czf ${ROOT}/../dist/csharp/avro-csharp-${VERSION}.tar.gz main codegen codec LICENSE NOTICE) # build documentation doxygen Avro.dox @@ -70,15 +77,16 @@ do ;; interop-data-generate) - dotnet run --project src/apache/test/Avro.test.csproj --framework net5.0 ../../share/test/schemas/interop.avsc ../../build/interop/data + dotnet run --project src/apache/test/Avro.test.csproj --framework net8.0 ../../share/test/schemas/interop.avsc ../../build/interop/data ;; interop-data-test) - LANG=en_US.UTF-8 dotnet test --filter "TestCategory=Interop" --verbosity normal + LANG=en_US.UTF-8 dotnet test --filter "TestCategory=Interop" --logger "console;verbosity=normal;noprogress=true" src/apache/test/Avro.test.csproj ;; clean) - rm -rf src/apache/{main,test,codegen,ipc,msbuild,perf}/{obj,bin} + rm -rf src/apache/{main,test,codegen,ipc,msbuild,perf,benchmark}/{obj,bin} + rm -rf src/apache/codec/Avro.File.{BZip2,Snappy,XZ,ZStandard}{,.Test}/{obj,bin} rm -rf build rm -f TestResult.xml ;; diff --git a/lang/csharp/common.props b/lang/csharp/common.props index 3bae5bff4a8..f41617eadfa 100644 --- a/lang/csharp/common.props +++ b/lang/csharp/common.props @@ -15,6 +15,8 @@ limitations under the License. --> + + $(MSBuildThisFileDirectory)/../../share/VERSION.txt @@ -33,11 +35,21 @@ $(MajorVersion).$(MinorVersion).$(BuildNumber).0 + + + net6.0;net7.0;net8.0 + + netstandard2.0;netstandard2.1 + + $(DefaultExeTargetFrameworks) + + Copyright Š 2019 The Apache Software Foundation. - avro-logo.png - LICENSE + logo.png + Apache-2.0 + README.md https://avro.apache.org/ Avro;Apache;Serialization;Binary;Json;Schema https://github.com/apache/avro.git @@ -45,7 +57,33 @@ - + + + + + + + false + true + + + + false + true + + true + false + + + + + + + + + + + diff --git a/lang/csharp/src/apache/benchmark/.gitignore b/lang/csharp/src/apache/benchmark/.gitignore new file mode 100644 index 00000000000..43e05771fa4 --- /dev/null +++ b/lang/csharp/src/apache/benchmark/.gitignore @@ -0,0 +1 @@ +BenchmarkDotNet.Artifacts/ \ No newline at end of file diff --git a/lang/csharp/src/apache/benchmark/Avro.benchmark.csproj b/lang/csharp/src/apache/benchmark/Avro.benchmark.csproj new file mode 100644 index 00000000000..b944de3c2d4 --- /dev/null +++ b/lang/csharp/src/apache/benchmark/Avro.benchmark.csproj @@ -0,0 +1,52 @@ + + + + + + $(DefaultExeTargetFrameworks) + Exe + + + AnyCPU + pdbonly + true + true + true + Release + false + + + + + + $(NoWarn);CS8981 + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/lang/csharp/src/apache/benchmark/Benchmarks.cs b/lang/csharp/src/apache/benchmark/Benchmarks.cs new file mode 100644 index 00000000000..4c7ec73be88 --- /dev/null +++ b/lang/csharp/src/apache/benchmark/Benchmarks.cs @@ -0,0 +1,254 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System.Collections.Generic; +using System.IO; +using BenchmarkDotNet.Attributes; +using Avro.Generic; +using Avro.IO; +using Avro.Specific; + +namespace Avro.Benchmark +{ + public class Benchmarks + { + private const int _numberOfRecordsInAvro = 100; + + private readonly string _schemaStrSmall; + private readonly string _schemaStrBig; + + private readonly RecordSchema _schemaSmall; + private readonly RecordSchema _schemaBig; + private readonly RecordSchema _schemaAddress; + + private readonly byte[] _avroGenericSmall; + private readonly byte[] _avroGenericBig; + private readonly byte[] _avroSpecificSmall; + private readonly byte[] _avroSpecificBig; + + public Benchmarks() + { + _schemaStrSmall = System.IO.File.ReadAllText("schema/small.avsc"); + _schemaStrBig = System.IO.File.ReadAllText("schema/big.avsc"); + + _schemaSmall = (RecordSchema)Schema.Parse(_schemaStrSmall); + _schemaBig = (RecordSchema)Schema.Parse(_schemaStrBig); + _schemaAddress = (RecordSchema)_schemaBig["address"].Schema; + + // Create avro for reading benchmarking + _avroGenericSmall = GenericRecordsToAvro(CreateGenericRecordSmall()); + _avroGenericBig = GenericRecordsToAvro(CreateGenericRecordBig()); + + _avroSpecificSmall = SpecificRecordsToAvro(CreateSpecificRecordSmall()); + _avroSpecificBig = SpecificRecordsToAvro(CreateSpecificRecordBig()); + } + + private byte[] GenericRecordsToAvro(GenericRecord record) + { + using (MemoryStream outputStream = new MemoryStream()) + { + GenericDatumWriter writer = new GenericDatumWriter(record.Schema); + BinaryEncoder encoder = new BinaryEncoder(outputStream); + + for (int i = 0; i < _numberOfRecordsInAvro; i++) + { + writer.Write(record, encoder); + } + + encoder.Flush(); + + return outputStream.ToArray(); + } + } + + private IList AvroToGenericRecordsToAvro(byte[] avro, RecordSchema schema) + { + using (MemoryStream inputStream = new MemoryStream(avro)) + { + GenericDatumReader reader = new GenericDatumReader(schema, schema); + BinaryDecoder decoder = new BinaryDecoder(inputStream); + List records = new List(); + + for (int i = 0; i < _numberOfRecordsInAvro; i++) + { + GenericRecord record = reader.Read(null, decoder); + if (record == null) + break; + records.Add(record); + } + + return records; + } + } + + private byte[] SpecificRecordsToAvro(T record) where T : ISpecificRecord + { + using (MemoryStream outputStream = new MemoryStream()) + { + SpecificDatumWriter writer = new SpecificDatumWriter(record.Schema); + BinaryEncoder encoder = new BinaryEncoder(outputStream); + + for (int i = 0; i < _numberOfRecordsInAvro; i++) + { + writer.Write(record, encoder); + } + + encoder.Flush(); + + return outputStream.ToArray(); + } + } + + private IList AvroToSpecificRecords(byte[] avro, RecordSchema schema) where T : ISpecificRecord + { + using (MemoryStream inputStream = new MemoryStream(avro)) + { + SpecificDatumReader reader = new SpecificDatumReader(schema, schema); + BinaryDecoder decoder = new BinaryDecoder(inputStream); + List records = new List(); + + for (int i = 0; i < _numberOfRecordsInAvro; i++) + { + T record = reader.Read(default, decoder); ; + if (record == null) + break; + records.Add(record); + } + + return records; + } + } + + [Benchmark] + public void ParseSchemaSmall() + { + Schema.Parse(_schemaStrSmall); + } + + [Benchmark] + public void ParseSchemaBig() + { + Schema.Parse(_schemaStrBig); + } + + [Benchmark] + public GenericRecord CreateGenericRecordSmall() + { + GenericRecord record = new GenericRecord(_schemaSmall); + record.Add("field", "foo"); + + return record; + } + + [Benchmark] + public GenericRecord CreateGenericRecordBig() + { + GenericRecord address = new GenericRecord(_schemaAddress); + address.Add("street", "street"); + address.Add("city", "city"); + address.Add("state_prov", "state_prov"); + address.Add("country", "country"); + address.Add("zip", "zip"); + + GenericRecord record = new GenericRecord(_schemaBig); + record.Add("username", "username"); + record.Add("age", 10); + record.Add("phone", "000000000"); + record.Add("housenum", "0000"); + record.Add("address", address); + + return record; + } + + [Benchmark] + public ISpecificRecord CreateSpecificRecordSmall() + { + return new org.apache.avro.benchmark.small.test() + { + field = "foo" + }; + } + + [Benchmark] + public ISpecificRecord CreateSpecificRecordBig() + { + return new org.apache.avro.benchmark.big.userInfo() + { + username = "username", + age = 10, + phone = "000000000", + housenum = "0000", + address = new org.apache.avro.benchmark.big.mailing_address() + { + street = "street", + city = "city", + state_prov = "state_prov", + country = "country", + zip = "zip" + } + }; + } + + [Benchmark] + public void GenericRecordsToAvroSmall() + { + GenericRecordsToAvro(CreateGenericRecordSmall()); + } + + [Benchmark] + public void GenericRecordsToAvroBig() + { + GenericRecordsToAvro(CreateGenericRecordBig()); + } + + [Benchmark] + public void AvroToGenericRecordsSmall() + { + AvroToGenericRecordsToAvro(_avroGenericSmall, _schemaSmall); + } + + [Benchmark] + public void AvroToGenericRecordsBig() + { + AvroToGenericRecordsToAvro(_avroGenericBig, _schemaBig); + } + + [Benchmark] + public void SpecificRecordsToAvroSmall() + { + SpecificRecordsToAvro(CreateSpecificRecordSmall()); + } + + [Benchmark] + public void SpecificRecordsToAvroBig() + { + SpecificRecordsToAvro(CreateSpecificRecordBig()); + } + + [Benchmark] + public void AvroToSpecificRecordsSmall() + { + AvroToSpecificRecords(_avroSpecificSmall, _schemaSmall); + } + + [Benchmark] + public void AvroToSpecificRecordsBig() + { + AvroToSpecificRecords(_avroSpecificBig, _schemaBig); + } + } +} diff --git a/lang/csharp/src/apache/benchmark/Program.cs b/lang/csharp/src/apache/benchmark/Program.cs new file mode 100644 index 00000000000..c29d51f94db --- /dev/null +++ b/lang/csharp/src/apache/benchmark/Program.cs @@ -0,0 +1,31 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using BenchmarkDotNet.Running; + +namespace Avro.Benchmark +{ + public class Program + { + // dotnet run -c Release -f net8.0 + // dotnet run -c Release -f net8.0 --runtimes net6.0 net7.0 net8.0 + public static void Main(string[] args) + { + BenchmarkSwitcher.FromAssembly(typeof(Program).Assembly).Run(args); + } + } +} diff --git a/lang/csharp/src/apache/benchmark/org/apache/avro/benchmark/big/mailing_address.cs b/lang/csharp/src/apache/benchmark/org/apache/avro/benchmark/big/mailing_address.cs new file mode 100644 index 00000000000..10f003deb54 --- /dev/null +++ b/lang/csharp/src/apache/benchmark/org/apache/avro/benchmark/big/mailing_address.cs @@ -0,0 +1,128 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// ------------------------------------------------------------------------------ +// +// Generated by avrogen, version 1.11.0.0 +// Changes to this file may cause incorrect behavior and will be lost if code +// is regenerated +// +// ------------------------------------------------------------------------------ +namespace org.apache.avro.benchmark.big +{ + using System; + using System.Collections.Generic; + using System.Text; + using Avro; + using Avro.Specific; + + public partial class mailing_address : ISpecificRecord + { + public static Schema _SCHEMA = Avro.Schema.Parse(@"{""type"":""record"",""name"":""mailing_address"",""namespace"":""org.apache.avro.benchmark.big"",""fields"":[{""name"":""street"",""default"":""NONE"",""type"":""string""},{""name"":""city"",""default"":""NONE"",""type"":""string""},{""name"":""state_prov"",""default"":""NONE"",""type"":""string""},{""name"":""country"",""default"":""NONE"",""type"":""string""},{""name"":""zip"",""default"":""NONE"",""type"":""string""}]}"); + private string _street; + private string _city; + private string _state_prov; + private string _country; + private string _zip; + public virtual Schema Schema + { + get + { + return mailing_address._SCHEMA; + } + } + public string street + { + get + { + return this._street; + } + set + { + this._street = value; + } + } + public string city + { + get + { + return this._city; + } + set + { + this._city = value; + } + } + public string state_prov + { + get + { + return this._state_prov; + } + set + { + this._state_prov = value; + } + } + public string country + { + get + { + return this._country; + } + set + { + this._country = value; + } + } + public string zip + { + get + { + return this._zip; + } + set + { + this._zip = value; + } + } + public virtual object Get(int fieldPos) + { + switch (fieldPos) + { + case 0: return this.street; + case 1: return this.city; + case 2: return this.state_prov; + case 3: return this.country; + case 4: return this.zip; + default: throw new AvroRuntimeException("Bad index " + fieldPos + " in Get()"); + }; + } + public virtual void Put(int fieldPos, object fieldValue) + { + switch (fieldPos) + { + case 0: this.street = (System.String)fieldValue; break; + case 1: this.city = (System.String)fieldValue; break; + case 2: this.state_prov = (System.String)fieldValue; break; + case 3: this.country = (System.String)fieldValue; break; + case 4: this.zip = (System.String)fieldValue; break; + default: throw new AvroRuntimeException("Bad index " + fieldPos + " in Put()"); + }; + } + } +} diff --git a/lang/csharp/src/apache/benchmark/org/apache/avro/benchmark/big/userInfo.cs b/lang/csharp/src/apache/benchmark/org/apache/avro/benchmark/big/userInfo.cs new file mode 100644 index 00000000000..4ddbe0ac071 --- /dev/null +++ b/lang/csharp/src/apache/benchmark/org/apache/avro/benchmark/big/userInfo.cs @@ -0,0 +1,128 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// ------------------------------------------------------------------------------ +// +// Generated by avrogen, version 1.11.0.0 +// Changes to this file may cause incorrect behavior and will be lost if code +// is regenerated +// +// ------------------------------------------------------------------------------ +namespace org.apache.avro.benchmark.big +{ + using System; + using System.Collections.Generic; + using System.Text; + using Avro; + using Avro.Specific; + + public partial class userInfo : ISpecificRecord + { + public static Schema _SCHEMA = Avro.Schema.Parse(@"{""type"":""record"",""name"":""userInfo"",""namespace"":""org.apache.avro.benchmark.big"",""fields"":[{""name"":""username"",""default"":""NONE"",""type"":""string""},{""name"":""age"",""default"":-1,""type"":""int""},{""name"":""phone"",""default"":""NONE"",""type"":""string""},{""name"":""housenum"",""default"":""NONE"",""type"":""string""},{""name"":""address"",""default"":{},""type"":{""type"":""record"",""name"":""mailing_address"",""namespace"":""org.apache.avro.benchmark.big"",""fields"":[{""name"":""street"",""default"":""NONE"",""type"":""string""},{""name"":""city"",""default"":""NONE"",""type"":""string""},{""name"":""state_prov"",""default"":""NONE"",""type"":""string""},{""name"":""country"",""default"":""NONE"",""type"":""string""},{""name"":""zip"",""default"":""NONE"",""type"":""string""}]}}]}"); + private string _username; + private int _age; + private string _phone; + private string _housenum; + private org.apache.avro.benchmark.big.mailing_address _address; + public virtual Schema Schema + { + get + { + return userInfo._SCHEMA; + } + } + public string username + { + get + { + return this._username; + } + set + { + this._username = value; + } + } + public int age + { + get + { + return this._age; + } + set + { + this._age = value; + } + } + public string phone + { + get + { + return this._phone; + } + set + { + this._phone = value; + } + } + public string housenum + { + get + { + return this._housenum; + } + set + { + this._housenum = value; + } + } + public org.apache.avro.benchmark.big.mailing_address address + { + get + { + return this._address; + } + set + { + this._address = value; + } + } + public virtual object Get(int fieldPos) + { + switch (fieldPos) + { + case 0: return this.username; + case 1: return this.age; + case 2: return this.phone; + case 3: return this.housenum; + case 4: return this.address; + default: throw new AvroRuntimeException("Bad index " + fieldPos + " in Get()"); + }; + } + public virtual void Put(int fieldPos, object fieldValue) + { + switch (fieldPos) + { + case 0: this.username = (System.String)fieldValue; break; + case 1: this.age = (System.Int32)fieldValue; break; + case 2: this.phone = (System.String)fieldValue; break; + case 3: this.housenum = (System.String)fieldValue; break; + case 4: this.address = (org.apache.avro.benchmark.big.mailing_address)fieldValue; break; + default: throw new AvroRuntimeException("Bad index " + fieldPos + " in Put()"); + }; + } + } +} diff --git a/lang/csharp/src/apache/benchmark/org/apache/avro/benchmark/small/test.cs b/lang/csharp/src/apache/benchmark/org/apache/avro/benchmark/small/test.cs new file mode 100644 index 00000000000..b0553d1fe16 --- /dev/null +++ b/lang/csharp/src/apache/benchmark/org/apache/avro/benchmark/small/test.cs @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// ------------------------------------------------------------------------------ +// +// Generated by avrogen, version 1.11.0.0 +// Changes to this file may cause incorrect behavior and will be lost if code +// is regenerated +// +// ------------------------------------------------------------------------------ +namespace org.apache.avro.benchmark.small +{ + using System; + using System.Collections.Generic; + using System.Text; + using Avro; + using Avro.Specific; + + public partial class test : ISpecificRecord + { + public static Schema _SCHEMA = Avro.Schema.Parse("{\"type\":\"record\",\"name\":\"test\",\"namespace\":\"org.apache.avro.benchmark.small\",\"fie" + + "lds\":[{\"name\":\"field\",\"type\":\"string\"}]}"); + private string _field; + public virtual Schema Schema + { + get + { + return test._SCHEMA; + } + } + public string field + { + get + { + return this._field; + } + set + { + this._field = value; + } + } + public virtual object Get(int fieldPos) + { + switch (fieldPos) + { + case 0: return this.field; + default: throw new AvroRuntimeException("Bad index " + fieldPos + " in Get()"); + }; + } + public virtual void Put(int fieldPos, object fieldValue) + { + switch (fieldPos) + { + case 0: this.field = (System.String)fieldValue; break; + default: throw new AvroRuntimeException("Bad index " + fieldPos + " in Put()"); + }; + } + } +} diff --git a/lang/csharp/src/apache/benchmark/schema/big.avsc b/lang/csharp/src/apache/benchmark/schema/big.avsc new file mode 100644 index 00000000000..d9075306a72 --- /dev/null +++ b/lang/csharp/src/apache/benchmark/schema/big.avsc @@ -0,0 +1,62 @@ +{ + "namespace": "org.apache.avro.benchmark.big", + "type": "record", + "name": "userInfo", + "fields": [ + { + "default": "NONE", + "type": "string", + "name": "username" + }, + { + "default": -1, + "type": "int", + "name": "age" + }, + { + "default": "NONE", + "type": "string", + "name": "phone" + }, + { + "default": "NONE", + "type": "string", + "name": "housenum" + }, + { + "default": {}, + "type": { + "fields": [ + { + "default": "NONE", + "type": "string", + "name": "street" + }, + { + "default": "NONE", + "type": "string", + "name": "city" + }, + { + "default": "NONE", + "type": "string", + "name": "state_prov" + }, + { + "default": "NONE", + "type": "string", + "name": "country" + }, + { + "default": "NONE", + "type": "string", + "name": "zip" + } + ], + "type": "record", + "name": "mailing_address" + }, + "name": "address" + } + ] +} \ No newline at end of file diff --git a/lang/csharp/src/apache/benchmark/schema/small.avsc b/lang/csharp/src/apache/benchmark/schema/small.avsc new file mode 100644 index 00000000000..ee320705c48 --- /dev/null +++ b/lang/csharp/src/apache/benchmark/schema/small.avsc @@ -0,0 +1,13 @@ +{ + "namespace": "org.apache.avro.benchmark.small", + "type": "record", + "name": "test", + "fields": [ + { + "type": { + "type": "string" + }, + "name": "field" + } + ] +} \ No newline at end of file diff --git a/lang/csharp/src/apache/codec/Avro.File.BZip2.Test/Avro.File.BZip2.Test.csproj b/lang/csharp/src/apache/codec/Avro.File.BZip2.Test/Avro.File.BZip2.Test.csproj new file mode 100644 index 00000000000..ace1db23c2c --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.BZip2.Test/Avro.File.BZip2.Test.csproj @@ -0,0 +1,42 @@ + + + + + + + $(DefaultUnitTestTargetFrameworks) + false + + + + true + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/lang/csharp/src/apache/codec/Avro.File.BZip2.Test/BZip2Tests.cs b/lang/csharp/src/apache/codec/Avro.File.BZip2.Test/BZip2Tests.cs new file mode 100644 index 00000000000..821cb4b7233 --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.BZip2.Test/BZip2Tests.cs @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System.IO; +using System.Linq; +using NUnit.Framework; + +namespace Avro.File.BZip2.Test +{ + public class Tests + { + private static readonly int[] _testLengths = new int[] { 0, 1000, 64 * 1024, 100000 }; + + [Test, Combinatorial] + public void CompressDecompress([ValueSource(nameof(_testLengths))] int length, [Values] BZip2Level level) + { + byte[] data = Enumerable.Range(0, length).Select(x => (byte)x).ToArray(); + + BZip2Codec codec = new BZip2Codec(level); + + byte[] compressed = codec.Compress(data); + byte[] uncompressed = codec.Decompress(compressed, compressed.Length); + + Assert.IsTrue(Enumerable.SequenceEqual(data, uncompressed)); + } + + [Test, Combinatorial] + public void CompressDecompressStream([ValueSource(nameof(_testLengths))] int length, [Values] BZip2Level level) + { + byte[] data = Enumerable.Range(0, length).Select(x => (byte)x).ToArray(); + + BZip2Codec codec = new BZip2Codec(level); + + using (MemoryStream inputStream = new MemoryStream(data)) + using (MemoryStream outputStream = new MemoryStream()) + { + codec.Compress(inputStream, outputStream); + + byte[] compressed = outputStream.ToArray(); + byte[] uncompressed = codec.Decompress(compressed, compressed.Length); + + Assert.IsTrue(Enumerable.SequenceEqual(data, uncompressed)); + } + } + + [Test] + public void ToStringAndName([Values] BZip2Level level) + { + BZip2Codec codec = new BZip2Codec(level); + + Assert.AreEqual("bzip2", codec.GetName()); + Assert.AreEqual($"bzip2-{(int)level}", codec.ToString()); + } + + [Test] + public void DefaultLevel() + { + BZip2Codec codec = new BZip2Codec(); + + Assert.AreEqual(BZip2Level.Default, codec.Level); + } + + [Test] + public void Equal([Values] BZip2Level level) + { + BZip2Codec codec1 = new BZip2Codec(level); + BZip2Codec codec2 = new BZip2Codec(level); + + Assert.IsTrue(codec1.Equals(codec1)); + Assert.IsTrue(codec2.Equals(codec2)); + Assert.IsTrue(codec1.Equals(codec2)); + Assert.IsTrue(codec2.Equals(codec1)); + } + + [Test] + public void HashCode([Values] BZip2Level level) + { + BZip2Codec codec = new BZip2Codec(level); + + Assert.AreNotEqual(0, codec.GetHashCode()); + } + } +} diff --git a/lang/csharp/src/apache/codec/Avro.File.BZip2/Avro.File.BZip2.csproj b/lang/csharp/src/apache/codec/Avro.File.BZip2/Avro.File.BZip2.csproj new file mode 100644 index 00000000000..8dac7c9f1ed --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.BZip2/Avro.File.BZip2.csproj @@ -0,0 +1,47 @@ + + + + + + + $(DefaultLibraryTargetFrameworks) + Avro.File.BZip2 + true + ../../../../Avro.snk + + + + + Apache.Avro.File.BZip2 + BZip2 compression library for Apache.Avro + + + + true + + + + + + + + + + + + diff --git a/lang/csharp/src/apache/codec/Avro.File.BZip2/BZip2.cs b/lang/csharp/src/apache/codec/Avro.File.BZip2/BZip2.cs new file mode 100644 index 00000000000..354c39cc89e --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.BZip2/BZip2.cs @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System.IO; + +namespace Avro.File.BZip2 +{ + ///

+ /// BZip2 Compression level + /// + public enum BZip2Level + { + Default = 9, + Level1 = 1, + Level2 = 2, + Level3 = 3, + Level4 = 4, + Level5 = 5, + Level6 = 6, + Level7 = 7, + Level8 = 8, + Level9 = 9 + } + + /// + /// Implements BZip2 compression and decompression. + /// + public class BZip2Codec : Codec + { + public BZip2Level Level {get; private set;} + + public BZip2Codec() + : this(BZip2Level.Default) + { + } + + public BZip2Codec(BZip2Level level) + { + Level = level; + } + + /// + public override byte[] Compress(byte[] uncompressedData) + { + using (MemoryStream inputStream = new MemoryStream(uncompressedData)) + using (MemoryStream outputStream = new MemoryStream()) + { + Compress(inputStream, outputStream); + return outputStream.ToArray(); + } + } + + /// + public override void Compress(MemoryStream inputStream, MemoryStream outputStream) + { + inputStream.Position = 0; + outputStream.SetLength(0); + ICSharpCode.SharpZipLib.BZip2.BZip2.Compress(inputStream, outputStream, false, (int)Level); + } + + /// + public override byte[] Decompress(byte[] compressedData, int blockLength) + { + using (MemoryStream inputStream = new MemoryStream(compressedData, 0, blockLength)) + using (MemoryStream outputStream = new MemoryStream()) + { + ICSharpCode.SharpZipLib.BZip2.BZip2.Decompress(inputStream, outputStream, false); + return outputStream.ToArray(); + } + } + + /// + public override string GetName() + { + return DataFileConstants.BZip2Codec; + } + + /// + public override bool Equals(object other) + { + return this == other || GetType().Name == other.GetType().Name; + } + + /// + public override int GetHashCode() + { + return GetName().GetHashCode(); + } + + /// + public override string ToString() + { + return $"{GetName()}-{(int)Level}"; + } + } +} diff --git a/lang/csharp/src/apache/codec/Avro.File.Snappy.Test/Avro.File.Snappy.Test.csproj b/lang/csharp/src/apache/codec/Avro.File.Snappy.Test/Avro.File.Snappy.Test.csproj new file mode 100644 index 00000000000..ab325b0fa64 --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.Snappy.Test/Avro.File.Snappy.Test.csproj @@ -0,0 +1,42 @@ + + + + + + + $(DefaultUnitTestTargetFrameworks) + false + + + + true + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/lang/csharp/src/apache/codec/Avro.File.Snappy.Test/SnappyTests.cs b/lang/csharp/src/apache/codec/Avro.File.Snappy.Test/SnappyTests.cs new file mode 100644 index 00000000000..148d493aae2 --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.Snappy.Test/SnappyTests.cs @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System.IO; +using System.Linq; +using NUnit.Framework; + +namespace Avro.File.Snappy.Test +{ + public class Tests + { + private static readonly int[] _testLengths = new int[] { 0, 1000, 64 * 1024, 100000 }; + + [TestCaseSource(nameof(_testLengths))] + public void CompressDecompress(int length) + { + byte[] data = Enumerable.Range(0, length).Select(x => (byte)x).ToArray(); + + SnappyCodec codec = new SnappyCodec(); + + byte[] compressed = codec.Compress(data); + byte[] uncompressed = codec.Decompress(compressed, compressed.Length); + + Assert.IsTrue(Enumerable.SequenceEqual(data, uncompressed)); + } + + [TestCaseSource(nameof(_testLengths))] + public void CompressDecompressStream(int length) + { + byte[] data = Enumerable.Range(0, length).Select(x => (byte)x).ToArray(); + + SnappyCodec codec = new SnappyCodec(); + + using (MemoryStream inputStream = new MemoryStream(data)) + using (MemoryStream outputStream = new MemoryStream()) + { + codec.Compress(inputStream, outputStream); + + byte[] compressed = outputStream.ToArray(); + byte[] uncompressed = codec.Decompress(compressed, compressed.Length); + + Assert.IsTrue(Enumerable.SequenceEqual(data, uncompressed)); + } + } + + [Test] + public void ToStringAndName() + { + SnappyCodec codec = new SnappyCodec(); + + Assert.AreEqual("snappy", codec.GetName()); + Assert.AreEqual("snappy", codec.ToString()); + } + + [Test] + public void Equal() + { + SnappyCodec codec1 = new SnappyCodec(); + SnappyCodec codec2 = new SnappyCodec(); + + Assert.IsTrue(codec1.Equals(codec1)); + Assert.IsTrue(codec2.Equals(codec2)); + Assert.IsTrue(codec1.Equals(codec2)); + Assert.IsTrue(codec2.Equals(codec1)); + } + + [Test] + public void HashCode() + { + SnappyCodec codec = new SnappyCodec(); + + Assert.AreNotEqual(0, codec.GetHashCode()); + } + } +} diff --git a/lang/csharp/src/apache/codec/Avro.File.Snappy/Avro.File.Snappy.csproj b/lang/csharp/src/apache/codec/Avro.File.Snappy/Avro.File.Snappy.csproj new file mode 100644 index 00000000000..71bc7968827 --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.Snappy/Avro.File.Snappy.csproj @@ -0,0 +1,47 @@ + + + + + + + $(DefaultLibraryTargetFrameworks) + Avro.File.Snappy + true + ../../../../Avro.snk + + + + + Apache.Avro.File.Snappy + Snappy compression library for Apache.Avro + + + + true + + + + + + + + + + + + diff --git a/lang/csharp/src/apache/codec/Avro.File.Snappy/Crc32.cs b/lang/csharp/src/apache/codec/Avro.File.Snappy/Crc32.cs new file mode 100644 index 00000000000..1b832e11e43 --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.Snappy/Crc32.cs @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System; + +namespace Avro.File.Snappy +{ + /// + /// Implements a 32-bit CRC hash algorithm. + /// + internal static class Crc32 + { + private const uint DefaultPolynomial = 0xedb88320u; + private const uint DefaultSeed = 0xffffffffu; + + private static uint[] defaultTable; + + public static uint Compute(byte[] buffer) + { + return Compute(DefaultPolynomial, DefaultSeed, buffer); + } + + public static uint Compute(uint polynomial, uint seed, ReadOnlySpan buffer) + { + return ~CalculateHash(InitializeTable(polynomial), seed, buffer); + } + + private static uint[] InitializeTable(uint polynomial) + { + if (polynomial == DefaultPolynomial && defaultTable != null) + return defaultTable; + + uint[] createTable = new uint[256]; + for (int i = 0; i < 256; i++) + { + uint entry = (uint)i; + for (int j = 0; j < 8; j++) + if ((entry & 1) == 1) + entry = (entry >> 1) ^ polynomial; + else + entry >>= 1; + createTable[i] = entry; + } + + if (polynomial == DefaultPolynomial) + defaultTable = createTable; + + return createTable; + } + + private static uint CalculateHash(uint[] table, uint seed, ReadOnlySpan buffer) + { + uint hash = seed; + for (int i = 0; i < buffer.Length; i++) + hash = (hash >> 8) ^ table[buffer[i] ^ hash & 0xff]; + return hash; + } + } +} diff --git a/lang/csharp/src/apache/codec/Avro.File.Snappy/Snappy.cs b/lang/csharp/src/apache/codec/Avro.File.Snappy/Snappy.cs new file mode 100644 index 00000000000..a73f8209c0d --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.Snappy/Snappy.cs @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System; +using System.IO; + +namespace Avro.File.Snappy +{ + /// + /// Implements Snappy compression and decompression. + /// + public class SnappyCodec : Codec + { + /// + /// + public override byte[] Compress(byte[] uncompressedData) + { + using (MemoryStream outputStream = new MemoryStream()) + { + byte[] compressedData = IronSnappy.Snappy.Encode(uncompressedData); + outputStream.Write(compressedData, 0, compressedData.Length); + + var crc = ByteSwap(Crc32.Compute(uncompressedData)); + outputStream.Write(BitConverter.GetBytes(crc), 0, 4); + + return outputStream.ToArray(); + } + } + + /// + public override void Compress(MemoryStream inputStream, MemoryStream outputStream) + { + inputStream.Position = 0; + + byte[] uncompressedData = inputStream.ToArray(); + byte[] compressedData = IronSnappy.Snappy.Encode(uncompressedData); + + outputStream.SetLength(0); + + outputStream.Write(compressedData, 0, compressedData.Length); + + var crc = ByteSwap(Crc32.Compute(uncompressedData)); + outputStream.Write(BitConverter.GetBytes(crc), 0, 4); + } + + /// + public override byte[] Decompress(byte[] compressedData, int blockLength) + { + byte[] uncompressedData = IronSnappy.Snappy.Decode(compressedData.AsSpan(0, blockLength - 4)); + + return ByteSwap(Crc32.Compute(uncompressedData)) == BitConverter.ToUInt32(compressedData, blockLength - 4) ? + uncompressedData : + throw new IOException("Checksum failure"); + } + + private static uint ByteSwap(uint word) + { + return ((word >> 24) & 0x000000FF) | ((word >> 8) & 0x0000FF00) | ((word << 8) & 0x00FF0000) | ((word << 24) & 0xFF000000); + } + + /// + public override string GetName() + { + return DataFileConstants.SnappyCodec; + } + + /// + public override bool Equals(object other) + { + return this == other || GetType().Name == other.GetType().Name; + } + + /// + public override int GetHashCode() + { + return GetName().GetHashCode(); + } + } +} diff --git a/lang/csharp/src/apache/codec/Avro.File.XZ.Test/Avro.File.XZ.Test.csproj b/lang/csharp/src/apache/codec/Avro.File.XZ.Test/Avro.File.XZ.Test.csproj new file mode 100644 index 00000000000..354c6a51c26 --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.XZ.Test/Avro.File.XZ.Test.csproj @@ -0,0 +1,42 @@ + + + + + + + $(DefaultUnitTestTargetFrameworks) + false + + + + true + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/lang/csharp/src/apache/codec/Avro.File.XZ.Test/XZTests.cs b/lang/csharp/src/apache/codec/Avro.File.XZ.Test/XZTests.cs new file mode 100644 index 00000000000..27f38dc2bf2 --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.XZ.Test/XZTests.cs @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System.IO; +using System.Linq; +using NUnit.Framework; + +namespace Avro.File.XZ.Test +{ + public class Tests + { + private static readonly int[] _testLengths = new int[] { 0, 1000, 64 * 1024, 100000 }; + + [Test, Combinatorial] + public void CompressDecompress([ValueSource(nameof(_testLengths))] int length, [Values] XZLevel level) + { + byte[] data = Enumerable.Range(0, length).Select(x => (byte)x).ToArray(); + + XZCodec codec = new XZCodec(level); + + byte[] compressed = codec.Compress(data); + byte[] uncompressed = codec.Decompress(compressed, compressed.Length); + + Assert.IsTrue(Enumerable.SequenceEqual(data, uncompressed)); + } + + [Test, Combinatorial] + public void CompressDecompressStream([ValueSource(nameof(_testLengths))] int length, [Values] XZLevel level) + { + byte[] data = Enumerable.Range(0, length).Select(x => (byte)x).ToArray(); + + XZCodec codec = new XZCodec(level); + + using (MemoryStream inputStream = new MemoryStream(data)) + using (MemoryStream outputStream = new MemoryStream()) + { + codec.Compress(inputStream, outputStream); + + byte[] compressed = outputStream.ToArray(); + byte[] uncompressed = codec.Decompress(compressed, compressed.Length); + + Assert.IsTrue(Enumerable.SequenceEqual(data, uncompressed)); + } + } + + [Test] + public void ToStringAndName([Values] XZLevel level) + { + XZCodec codec = new XZCodec(level); + + Assert.AreEqual("xz", codec.GetName()); + Assert.AreEqual($"xz-{(int)level}", codec.ToString()); + } + + [Test] + public void DefaultLevel() + { + XZCodec codec = new XZCodec(); + + Assert.AreEqual(XZLevel.Default, codec.Level); + } + + [Test] + public void Equal([Values] XZLevel level) + { + XZCodec codec1 = new XZCodec(level); + XZCodec codec2 = new XZCodec(level); + + Assert.IsTrue(codec1.Equals(codec1)); + Assert.IsTrue(codec2.Equals(codec2)); + Assert.IsTrue(codec1.Equals(codec2)); + Assert.IsTrue(codec2.Equals(codec1)); + } + + [Test] + public void HashCode([Values] XZLevel level) + { + XZCodec codec = new XZCodec(level); + + Assert.AreNotEqual(0, codec.GetHashCode()); + } + } +} diff --git a/lang/csharp/src/apache/codec/Avro.File.XZ/Avro.File.XZ.csproj b/lang/csharp/src/apache/codec/Avro.File.XZ/Avro.File.XZ.csproj new file mode 100644 index 00000000000..034bb99dcce --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.XZ/Avro.File.XZ.csproj @@ -0,0 +1,48 @@ + + + + + + + $(DefaultLibraryTargetFrameworks) + Avro.File.XZ + true + ../../../../Avro.snk + CS8002 + + + + + Apache.Avro.File.XZ + XZ compression library for Apache.Avro + + + + true + + + + + + + + + + + + diff --git a/lang/csharp/src/apache/codec/Avro.File.XZ/XZ.cs b/lang/csharp/src/apache/codec/Avro.File.XZ/XZ.cs new file mode 100644 index 00000000000..84d3742f6cc --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.XZ/XZ.cs @@ -0,0 +1,237 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System; +using System.Collections.Generic; +using System.IO; +using System.Runtime.InteropServices; +using Joveler.Compression.XZ; + +namespace Avro.File.XZ +{ + /// + /// XZ Compression level + /// + public enum XZLevel + { + Level0 = 0, + Level1 = 1, + Level2 = 2, + Level3 = 3, + Level4 = 4, + Level5 = 5, + Level6 = 6, + Level7 = 7, + Level8 = 8, + Level9 = 9, + Default = Level6, + Minimum = Level0, + Maximum = Level9 + } + + /// + /// Implements XZ compression and decompression. + /// + public class XZCodec : Codec + { + public XZLevel Level {get; private set;} + public bool Extreme {get; private set;} + public int Threads {get; private set;} + + public XZCodec() + : this(XZLevel.Default) + { + } + + public XZCodec(XZLevel level) + : this(level, false) + { + } + + public XZCodec(XZLevel level, bool extreme) + : this(level, extreme, 0) + { + } + + public XZCodec(XZLevel level, bool extreme, int numOfThreads) + { + Level = level; + Extreme = extreme; + Threads = numOfThreads; + } + + static XZCodec() + { + Initialize(); // One time initialization + } + + private static void Initialize() + { + string arch = RuntimeInformation.OSArchitecture.ToString().ToLower(); + string foundLibPath = string.Empty; + string libPath; + string rid; + string libName; + + // Determine Platform (needed for proper Runtime ID) + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + { + rid = $"win-{arch}"; + libName = "liblzma.dll"; + } + else + if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) + { + rid = $"linux-{arch}"; + libName = "liblzma.so"; + } + else + if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) + { + rid = $"osx-{arch}"; + libName = "liblzma.dylib"; + } + else + { + // Unknown platform + throw new PlatformNotSupportedException("Unknown runtime platform!"); + } + + // Try to search for the lib in the working directory and the application binary directory + foreach (var relPath in new List { ".", AppDomain.CurrentDomain.BaseDirectory }) + { + // Try first the lib name directly + libPath = Path.Combine(relPath, libName); + if (System.IO.File.Exists(libPath)) + { + foundLibPath = libPath; + break; + } + + // Try the runtimes/RID/native location + // This is the default location for netstandard native libs + libPath = Path.Combine(relPath, "runtimes", rid, "native", libName); + if (System.IO.File.Exists(libPath)) + { + foundLibPath = libPath; + break; + } + } + + // Try the OS search path if nothing is found yet + if (string.IsNullOrEmpty(foundLibPath)) + { + var values = Environment.GetEnvironmentVariable("PATH"); + foreach (string path in values.Split(Path.PathSeparator)) + { + libPath = Path.Combine(path, libName); + if (System.IO.File.Exists(libPath)) + { + foundLibPath = libPath; + break; + } + } + } + + if (string.IsNullOrEmpty(foundLibPath)) + throw new PlatformNotSupportedException($"Unable to find {libName}"); + + // Initialize XZ library + XZInit.GlobalInit(foundLibPath); + } + + public static void Uninitialize() + { + XZInit.GlobalCleanup(); + } + + /// + public override byte[] Compress(byte[] uncompressedData) + { + using (MemoryStream inputStream = new MemoryStream(uncompressedData)) + using (MemoryStream outputStream = new MemoryStream()) + { + Compress(inputStream, outputStream); + return outputStream.ToArray(); + } + } + + /// + public override void Compress(MemoryStream inputStream, MemoryStream outputStream) + { + XZCompressOptions compOpts = new XZCompressOptions + { + Level = (LzmaCompLevel)(int)Level, + ExtremeFlag = Extreme, + LeaveOpen = true + }; + + XZThreadedCompressOptions threadOpts = new XZThreadedCompressOptions + { + Threads = Threads, + }; + + inputStream.Position = 0; + outputStream.SetLength(0); + + using (XZStream xzStream = new XZStream(outputStream, compOpts, threadOpts)) + { + inputStream.CopyTo(xzStream); + xzStream.Flush(); + } + } + + /// + public override byte[] Decompress(byte[] compressedData, int blockLength) + { + XZDecompressOptions decompOpts = new XZDecompressOptions(); + + using (MemoryStream inputStream = new MemoryStream(compressedData, 0, blockLength)) + using (MemoryStream outputStream = new MemoryStream()) + using (XZStream xzStream = new XZStream(inputStream, decompOpts)) + { + xzStream.CopyTo(outputStream); + xzStream.Flush(); + return outputStream.ToArray(); + } + } + + /// + public override string GetName() + { + return DataFileConstants.XZCodec; + } + + /// + public override bool Equals(object other) + { + return this == other || GetType().Name == other.GetType().Name; + } + + /// + public override int GetHashCode() + { + return GetName().GetHashCode(); + } + + /// + public override string ToString() + { + return $"{GetName()}-{(int)Level}"; + } + } +} diff --git a/lang/csharp/src/apache/codec/Avro.File.Zstandard.Test/Avro.File.Zstandard.Test.csproj b/lang/csharp/src/apache/codec/Avro.File.Zstandard.Test/Avro.File.Zstandard.Test.csproj new file mode 100644 index 00000000000..651fabded68 --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.Zstandard.Test/Avro.File.Zstandard.Test.csproj @@ -0,0 +1,42 @@ + + + + + + + $(DefaultUnitTestTargetFrameworks) + false + + + + true + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/lang/csharp/src/apache/codec/Avro.File.Zstandard.Test/ZstandardTests.cs b/lang/csharp/src/apache/codec/Avro.File.Zstandard.Test/ZstandardTests.cs new file mode 100644 index 00000000000..e360ee576c8 --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.Zstandard.Test/ZstandardTests.cs @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System.IO; +using System.Linq; +using NUnit.Framework; + +namespace Avro.File.Zstandard.Test +{ + public class Tests + { + private static readonly int[] _testLengths = new int[] { 0, 1000, 64 * 1024, 100000 }; + + [Test, Combinatorial] + public void CompressDecompress([ValueSource(nameof(_testLengths))] int length, [Values] ZstandardLevel level) + { + byte[] data = Enumerable.Range(0, length).Select(x => (byte)x).ToArray(); + + ZstandardCodec codec = new ZstandardCodec(level); + + byte[] compressed = codec.Compress(data); + byte[] uncompressed = codec.Decompress(compressed, compressed.Length); + + Assert.IsTrue(Enumerable.SequenceEqual(data, uncompressed)); + } + + [Test, Combinatorial] + public void CompressDecompressStream([ValueSource(nameof(_testLengths))] int length, [Values] ZstandardLevel level) + { + byte[] data = Enumerable.Range(0, length).Select(x => (byte)x).ToArray(); + + ZstandardCodec codec = new ZstandardCodec(level); + + using (MemoryStream inputStream = new MemoryStream(data)) + using (MemoryStream outputStream = new MemoryStream()) + { + codec.Compress(inputStream, outputStream); + + byte[] compressed = outputStream.ToArray(); + byte[] uncompressed = codec.Decompress(compressed, compressed.Length); + + Assert.IsTrue(Enumerable.SequenceEqual(data, uncompressed)); + } + } + + [Test] + public void ToStringAndName([Values] ZstandardLevel level) + { + ZstandardCodec codec = new ZstandardCodec(level); + + Assert.AreEqual("zstandard", codec.GetName()); + Assert.AreEqual($"zstandard[{(int)level}]", codec.ToString()); + } + + [Test] + public void DefaultLevel() + { + ZstandardCodec codec = new ZstandardCodec(); + + Assert.AreEqual(ZstandardLevel.Default, codec.Level); + } + + [Test] + public void Equal([Values] ZstandardLevel level) + { + ZstandardCodec codec1 = new ZstandardCodec(level); + ZstandardCodec codec2 = new ZstandardCodec(level); + + Assert.IsTrue(codec1.Equals(codec1)); + Assert.IsTrue(codec2.Equals(codec2)); + Assert.IsTrue(codec1.Equals(codec2)); + Assert.IsTrue(codec2.Equals(codec1)); + } + + [Test] + public void HashCode([Values] ZstandardLevel level) + { + ZstandardCodec codec = new ZstandardCodec(level); + + Assert.AreNotEqual(0, codec.GetHashCode()); + } + } +} diff --git a/lang/csharp/src/apache/codec/Avro.File.Zstandard/Avro.File.Zstandard.csproj b/lang/csharp/src/apache/codec/Avro.File.Zstandard/Avro.File.Zstandard.csproj new file mode 100644 index 00000000000..17f9f9f00f5 --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.Zstandard/Avro.File.Zstandard.csproj @@ -0,0 +1,48 @@ + + + + + + + $(DefaultLibraryTargetFrameworks) + Avro.File.Zstandard + true + ../../../../Avro.snk + CS8002 + + + + + Apache.Avro.File.Zstandard + Zstandard compression library for Apache.Avro + + + + true + + + + + + + + + + + + diff --git a/lang/csharp/src/apache/codec/Avro.File.Zstandard/Zstandard.cs b/lang/csharp/src/apache/codec/Avro.File.Zstandard/Zstandard.cs new file mode 100644 index 00000000000..5adfb441799 --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.Zstandard/Zstandard.cs @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System.IO; +using System.IO.Compression; +using Zstandard.Net; + +namespace Avro.File.Zstandard +{ + /// + /// Zstandard Compression level + /// + public enum ZstandardLevel + { + Level1 = 1, + Level2 = 2, + Level3 = 3, + Level4 = 4, + Level5 = 5, + Level6 = 6, + Level7 = 7, + Level8 = 8, + Level9 = 9, + Level10 = 10, + Level11 = 11, + Level12 = 12, + Level13 = 13, + Level14 = 14, + Level15 = 15, + Level16 = 16, + Level17 = 17, + Level18 = 18, + Level19 = 19, + Default = Level3, + Minimum = Level1, + Maximum = Level19 + } + + /// + /// Implements Zstandard compression and decompression. + /// + public class ZstandardCodec : Codec + { + public ZstandardLevel Level {get; private set;} + + public ZstandardCodec() + : this(ZstandardLevel.Default) + { + } + + public ZstandardCodec(ZstandardLevel level) + { + Level = level; + } + + /// + public override byte[] Compress(byte[] uncompressedData) + { + using (var outputStream = new MemoryStream()) + using (var compressionStream = new ZstandardStream(outputStream, CompressionMode.Compress)) + { + compressionStream.CompressionLevel = (int)Level; + compressionStream.Write(uncompressedData, 0, uncompressedData.Length); + compressionStream.Flush(); + return outputStream.ToArray(); + } + } + + /// + public override void Compress(MemoryStream inputStream, MemoryStream outputStream) + { + inputStream.Position = 0; + outputStream.SetLength(0); + + using (var compressionStream = new ZstandardStream(outputStream, CompressionMode.Compress, true)) + { + compressionStream.CompressionLevel = (int)Level; + inputStream.CopyTo(compressionStream); + compressionStream.Flush(); + } + } + + /// + public override byte[] Decompress(byte[] compressedData, int blockLength) + { + using (var memoryStream = new MemoryStream(compressedData, 0, blockLength)) + using (var outputStream = new MemoryStream()) + using (var compressionStream = new ZstandardStream(memoryStream, CompressionMode.Decompress)) + { + compressionStream.CopyTo(outputStream); + compressionStream.Flush(); + return outputStream.ToArray(); + } + } + + /// + public override string GetName() + { + return DataFileConstants.ZstandardCodec; + } + + /// + public override bool Equals(object other) + { + return this == other || GetType().Name == other.GetType().Name; + } + + /// + public override int GetHashCode() + { + return GetName().GetHashCode(); + } + + /// + public override string ToString() + { + return $"{GetName()}[{(int)Level}]"; + } + } +} diff --git a/lang/csharp/src/apache/codegen/Avro.codegen.csproj b/lang/csharp/src/apache/codegen/Avro.codegen.csproj index 055a20f781f..94aa8123119 100644 --- a/lang/csharp/src/apache/codegen/Avro.codegen.csproj +++ b/lang/csharp/src/apache/codegen/Avro.codegen.csproj @@ -17,20 +17,18 @@ - Exe - netcoreapp2.1;netcoreapp3.1;net5.0 + $(DefaultExeTargetFrameworks) avrogen Avro.codegen - false true ..\..\..\Avro.snk @@ -51,20 +49,19 @@ + + + Major + + true - - - - - - - - - diff --git a/lang/csharp/src/apache/codegen/AvroGen.cs b/lang/csharp/src/apache/codegen/AvroGen.cs index 5f4ffd24db8..3b07ca59fdc 100644 --- a/lang/csharp/src/apache/codegen/AvroGen.cs +++ b/lang/csharp/src/apache/codegen/AvroGen.cs @@ -1,4 +1,4 @@ -īģŋ/** +/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,25 +17,43 @@ */ using System; using System.Collections.Generic; -using System.Text; +using System.Linq; +using System.Reflection; namespace Avro { - class AvroGen + public class AvroGenTool { - static int Main(string[] args) + public static int Main(string[] args) { - // Print usage if no arguments provided or help requested - if (args.Length == 0 || args[0] == "-h" || args[0] == "--help") + // Print usage if no arguments provided + if (args.Length == 0) { Usage(); return 1; } + // Print usage if help requested + if (args.Contains("-h") || args.Contains("--help")) + { + Usage(); + return 0; + } + + if (args.Contains("--version") || args.Contains("-V")) + { + // Print version information + // Note: Use InformationalVersion attribute + // It is capable to include semver prerelease information label (if prerelease), e.g. 1.x.y-beta.z + Console.WriteLine(typeof(AvroGenTool).Assembly.GetCustomAttribute().InformationalVersion); + return 0; + } + // Parse command line arguments bool? isProtocol = null; string inputFile = null; string outputDir = null; + bool skipDirectoriesCreation = false; var namespaceMapping = new Dictionary(); for (int i = 0; i < args.Length; ++i) { @@ -82,6 +100,10 @@ static int Main(string[] args) namespaceMapping[parts[0]] = parts[1]; } + else if (args[i] == "--skip-directories") + { + skipDirectoriesCreation = true; + } else if (outputDir == null) { outputDir = args[i]; @@ -116,7 +138,7 @@ static int Main(string[] args) else if (isProtocol.Value) rc = GenProtocol(inputFile, outputDir, namespaceMapping); else - rc = GenSchema(inputFile, outputDir, namespaceMapping); + rc = GenSchema(inputFile, outputDir, namespaceMapping, skipDirectoriesCreation); return rc; } @@ -128,26 +150,24 @@ static void Usage() " avrogen -p [--namespace ]\n" + " avrogen -s [--namespace ]\n\n" + "Options:\n" + - " -h --help Show this screen.\n" + - " --namespace Map an Avro schema/protocol namespace to a C# namespace.\n" + - " The format is \"my.avro.namespace:my.csharp.namespace\".\n" + - " May be specified multiple times to map multiple namespaces.\n", + " -h --help Show this screen.\n" + + " -V --version Show version.\n" + + " --namespace Map an Avro schema/protocol namespace to a C# namespace.\n" + + " The format is \"my.avro.namespace:my.csharp.namespace\".\n" + + " May be specified multiple times to map multiple namespaces.\n" + + " --skip-directories Skip creation of namespace directories. It will generate classes right inside output directory\n", AppDomain.CurrentDomain.FriendlyName); - return; } - static int GenProtocol(string infile, string outdir, + + public static int GenProtocol(string infile, string outdir, IEnumerable> namespaceMapping) { try { string text = System.IO.File.ReadAllText(infile); - Protocol protocol = Protocol.Parse(text); CodeGen codegen = new CodeGen(); - codegen.AddProtocol(protocol); - - foreach (var entry in namespaceMapping) - codegen.NamespaceMapping[entry.Key] = entry.Value; + codegen.AddProtocol(text, namespaceMapping); codegen.GenerateCode(); codegen.WriteTypes(outdir); @@ -160,22 +180,18 @@ static int GenProtocol(string infile, string outdir, return 0; } - static int GenSchema(string infile, string outdir, - IEnumerable> namespaceMapping) + + public static int GenSchema(string infile, string outdir, + IEnumerable> namespaceMapping, bool skipDirectories) { try { string text = System.IO.File.ReadAllText(infile); - Schema schema = Schema.Parse(text); - CodeGen codegen = new CodeGen(); - codegen.AddSchema(schema); - - foreach (var entry in namespaceMapping) - codegen.NamespaceMapping[entry.Key] = entry.Value; + codegen.AddSchema(text, namespaceMapping); codegen.GenerateCode(); - codegen.WriteTypes(outdir); + codegen.WriteTypes(outdir, skipDirectories); } catch (Exception ex) { diff --git a/lang/csharp/src/apache/ipc.test/Avro.ipc.test.csproj b/lang/csharp/src/apache/ipc.test/Avro.ipc.test.csproj index 47eeba2e5bd..dfa5faa4cfe 100644 --- a/lang/csharp/src/apache/ipc.test/Avro.ipc.test.csproj +++ b/lang/csharp/src/apache/ipc.test/Avro.ipc.test.csproj @@ -16,8 +16,11 @@ --> + + + - net40 + $(DefaultUnitTestTargetFrameworks) Avro.ipc.test Avro.ipc.test false @@ -30,23 +33,10 @@ - - 3.10.1 - - - 3.10.0 - - - 3.9.0 - - - - - - - - - + + + + diff --git a/lang/csharp/src/apache/ipc/Avro.ipc.csproj b/lang/csharp/src/apache/ipc/Avro.ipc.csproj index b72fe9fe589..587a14695b4 100644 --- a/lang/csharp/src/apache/ipc/Avro.ipc.csproj +++ b/lang/csharp/src/apache/ipc/Avro.ipc.csproj @@ -16,8 +16,11 @@ --> + + + - net40;netstandard2.0 + $(DefaultLibraryTargetFrameworks) Avro.ipc Avro.ipc false @@ -27,7 +30,7 @@ - + diff --git a/lang/csharp/src/apache/ipc/HttpListenerServer.cs b/lang/csharp/src/apache/ipc/HttpListenerServer.cs index 9bd03d2975a..21d1759717c 100644 --- a/lang/csharp/src/apache/ipc/HttpListenerServer.cs +++ b/lang/csharp/src/apache/ipc/HttpListenerServer.cs @@ -77,7 +77,7 @@ protected void HttpListenerCallback(IAsyncResult result) // ExceptionHandler(ex, result); //else // Debug.Print("Exception occured while processing a request, no exception handler was provided - ignoring", ex); - Debug.Print("Exception occured while processing a web request, skipping this request: ", ex); + Debug.Print("Exception occurred while processing a web request, skipping this request: ", ex); } } diff --git a/lang/csharp/src/apache/ipc/Responder.cs b/lang/csharp/src/apache/ipc/Responder.cs index 875977462d5..10c40670a05 100644 --- a/lang/csharp/src/apache/ipc/Responder.cs +++ b/lang/csharp/src/apache/ipc/Responder.cs @@ -181,14 +181,14 @@ public IList Respond(IList buffers, WriteResponse(m.Response, response, output); else { - try - { - WriteError(m.SupportedErrors, error, output); - } - catch (Exception) - { - // Presumably no match on the exception, throw the original - throw error; + try + { + WriteError(m.SupportedErrors, error, output); + } + catch (Exception) + { + // Presumably no match on the exception, throw the original + throw error; } } } diff --git a/lang/csharp/src/apache/ipc/Specific/SpecificRequestor.cs b/lang/csharp/src/apache/ipc/Specific/SpecificRequestor.cs index b14ceff4fed..4ef8916d9b5 100644 --- a/lang/csharp/src/apache/ipc/Specific/SpecificRequestor.cs +++ b/lang/csharp/src/apache/ipc/Specific/SpecificRequestor.cs @@ -1,4 +1,4 @@ -īģŋ/** +/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -87,8 +87,8 @@ public override object ReadResponse(Schema writer, Schema reader, Decoder decode public override Exception ReadError(Schema writer, Schema reader, Decoder decoder) { - var response = new SpecificReader(writer, reader).Read(null, decoder); - + var response = new SpecificReader(writer, reader).Read(null, decoder); + var error = response as Exception; if(error != null) return error; diff --git a/lang/csharp/src/apache/main/AssemblyInfo.cs b/lang/csharp/src/apache/main/AssemblyInfo.cs new file mode 100644 index 00000000000..53eacc1f9df --- /dev/null +++ b/lang/csharp/src/apache/main/AssemblyInfo.cs @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System.Runtime.CompilerServices; + +[assembly: InternalsVisibleTo("Avro.test, PublicKey=00240000048000009400000006020000002400005253413100040000010001001145636d1b96168c2781abfd60478f45d010fe83dd0f318404cbf67252bca8cd827f24648d47ff682f35e60307c05d3cd89f0b063729cf8d2ebe6510b9e7d295dec6707ec91719d859458981f7ca1cbbea79b702b2fb64d1dbf0881887315345b70fa50fcf91b59e6a937c8d23919d409ee2f1f234cc4c8dbf5a29d3d670f3c9")] diff --git a/lang/csharp/src/apache/main/Avro.main.csproj b/lang/csharp/src/apache/main/Avro.main.csproj index 15cfeadde71..beb1dff4d58 100644 --- a/lang/csharp/src/apache/main/Avro.main.csproj +++ b/lang/csharp/src/apache/main/Avro.main.csproj @@ -17,10 +17,9 @@ - - netstandard2.0;netstandard2.1;netcoreapp2.1 + $(DefaultLibraryTargetFrameworks) Avro Avro true @@ -48,28 +47,10 @@ - - - - - - - all - runtime; build; native; contentfiles; analyzers - - - all - runtime; build; native; contentfiles; analyzers; buildtransitive - - - - - - - + diff --git a/lang/csharp/src/apache/main/AvroDecimal.cs b/lang/csharp/src/apache/main/AvroDecimal.cs index 17d11204764..ed748ae35f0 100644 --- a/lang/csharp/src/apache/main/AvroDecimal.cs +++ b/lang/csharp/src/apache/main/AvroDecimal.cs @@ -25,12 +25,10 @@ namespace Avro /// /// Represents a big decimal. /// - #pragma warning disable CS1591 // Missing XML comment for publicly visible type or member - #pragma warning disable CA2225 // Operator overloads have named alternates public struct AvroDecimal : IConvertible, IFormattable, IComparable, IComparable, IEquatable { /// - /// Initializes a new instance of the class from a given double. + /// Initializes a new instance of the struct from a given double. /// /// The double value. public AvroDecimal(double value) @@ -39,7 +37,7 @@ public AvroDecimal(double value) } /// - /// Initializes a new instance of the class from a given float. + /// Initializes a new instance of the struct from a given float. /// /// The float value. public AvroDecimal(float value) @@ -48,56 +46,69 @@ public AvroDecimal(float value) } /// - /// Initializes a new instance of the class from a given decimal. + /// Initializes a new instance of the struct from a given decimal. /// /// The decimal value. public AvroDecimal(decimal value) { var bytes = GetBytesFromDecimal(value); - var unscaledValueBytes = new byte[12]; - Array.Copy(bytes, unscaledValueBytes, unscaledValueBytes.Length); + // Copy the first 12 bytes of the decimal into an array of size 13 + // so that the last byte is 0, which is required by the + // BigInteger constructor to ensure the unscaled value is positive. + var unscaledValueBytes = new byte[13]; + Array.Copy(bytes, unscaledValueBytes, 12); var unscaledValue = new BigInteger(unscaledValueBytes); var scale = bytes[14]; if (bytes[15] == 128) + { unscaledValue *= BigInteger.MinusOne; + } UnscaledValue = unscaledValue; Scale = scale; } /// - /// Initializes a new instance of the class from a given int. + /// Initializes a new instance of the struct from a given int. /// /// The int value. public AvroDecimal(int value) - : this(new BigInteger(value), 0) { } + : this(new BigInteger(value), 0) + { + } /// - /// Initializes a new instance of the class from a given long. + /// Initializes a new instance of the struct from a given long. /// /// The long value. public AvroDecimal(long value) - : this(new BigInteger(value), 0) { } + : this(new BigInteger(value), 0) + { + } /// - /// Initializes a new instance of the class from a given unsigned int. + /// Initializes a new instance of the struct from a given unsigned int. /// /// The unsigned int value. public AvroDecimal(uint value) - : this(new BigInteger(value), 0) { } + : this(new BigInteger(value), 0) + { + } /// - /// Initializes a new instance of the class from a given unsigned long. + /// Initializes a new instance of the struct from a given unsigned long. /// /// The unsigned long value. public AvroDecimal(ulong value) - : this(new BigInteger(value), 0) { } + : this(new BigInteger(value), 0) + { + } /// - /// Initializes a new instance of the class from a given + /// Initializes a new instance of the struct from a given /// and a scale. /// /// The double value. @@ -109,302 +120,567 @@ public AvroDecimal(BigInteger unscaledValue, int scale) } /// - /// Gets the unscaled integer value represented by the current . + /// Gets the unscaled integer value represented by the current . /// + /// + /// The unscaled value. + /// public BigInteger UnscaledValue { get; } /// - /// Gets the scale of the current . + /// Gets the scale of the current . /// + /// + /// The scale. + /// public int Scale { get; } /// - /// Gets the sign of the current . + /// Gets the sign of the current . /// + /// + /// The sign. + /// internal int Sign { get { return UnscaledValue.Sign; } } /// - /// Converts the current to a string. + /// Converts the current to a string. /// - /// A string representation of the numeric value. + /// + /// A string representation of the numeric value. + /// public override string ToString() { var number = UnscaledValue.ToString($"D{Scale + 1}", CultureInfo.CurrentCulture); if (Scale > 0) + { return number.Insert(number.Length - Scale, CultureInfo.CurrentCulture.NumberFormat.NumberDecimalSeparator); + } return number; } + /// + /// Implements the operator ==. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator ==(AvroDecimal left, AvroDecimal right) { return left.Equals(right); } + /// + /// Implements the operator !=. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator !=(AvroDecimal left, AvroDecimal right) { return !left.Equals(right); } + /// + /// Implements the operator >. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator >(AvroDecimal left, AvroDecimal right) { return left.CompareTo(right) > 0; } + /// + /// Implements the operator >=. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator >=(AvroDecimal left, AvroDecimal right) { return left.CompareTo(right) >= 0; } + /// + /// Implements the operator <. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator <(AvroDecimal left, AvroDecimal right) { return left.CompareTo(right) < 0; } + /// + /// Implements the operator <=. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator <=(AvroDecimal left, AvroDecimal right) { return left.CompareTo(right) <= 0; } + /// + /// Implements the operator ==. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator ==(AvroDecimal left, decimal right) { return left.Equals(right); } + /// + /// Implements the operator !=. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator !=(AvroDecimal left, decimal right) { return !left.Equals(right); } + /// + /// Implements the operator >. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator >(AvroDecimal left, decimal right) { return left.CompareTo(right) > 0; } + /// + /// Implements the operator >=. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator >=(AvroDecimal left, decimal right) { return left.CompareTo(right) >= 0; } + /// + /// Implements the operator <. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator <(AvroDecimal left, decimal right) { return left.CompareTo(right) < 0; } + /// + /// Implements the operator <=. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator <=(AvroDecimal left, decimal right) { return left.CompareTo(right) <= 0; } + /// + /// Implements the operator ==. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator ==(decimal left, AvroDecimal right) { return left.Equals(right); } + /// + /// Implements the operator !=. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator !=(decimal left, AvroDecimal right) { return !left.Equals(right); } + /// + /// Implements the operator >. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator >(decimal left, AvroDecimal right) { return left.CompareTo(right) > 0; } + /// + /// Implements the operator >=. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator >=(decimal left, AvroDecimal right) { return left.CompareTo(right) >= 0; } + /// + /// Implements the operator <. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator <(decimal left, AvroDecimal right) { return left.CompareTo(right) < 0; } + /// + /// Implements the operator <=. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator <=(decimal left, AvroDecimal right) { return left.CompareTo(right) <= 0; } + /// + /// Performs an explicit conversion from to . + /// + /// The . + /// + /// A . + /// public static explicit operator byte(AvroDecimal value) { return ToByte(value); } /// - /// Creates a byte from a given . + /// Creates a from a given . /// - /// The . - /// A byte. + /// The . + /// + /// A . + /// public static byte ToByte(AvroDecimal value) { return value.ToType(); } + /// + /// Performs an explicit conversion from to . + /// + /// The . + /// + /// A . + /// public static explicit operator sbyte(AvroDecimal value) { return ToSByte(value); } /// - /// Creates a signed byte from a given . + /// Creates a from a given . /// - /// The . - /// A signed byte. + /// The . + /// + /// A . + /// public static sbyte ToSByte(AvroDecimal value) { return value.ToType(); } + /// + /// Performs an explicit conversion from to . + /// + /// The . + /// + /// A . + /// public static explicit operator short(AvroDecimal value) { return ToInt16(value); } /// - /// Creates a short from a given . + /// Creates a short from a given . /// - /// The . - /// A short. + /// The . + /// + /// A . + /// public static short ToInt16(AvroDecimal value) { return value.ToType(); } + /// + /// Performs an explicit conversion from to . + /// + /// The . + /// + /// An . + /// public static explicit operator int(AvroDecimal value) { return ToInt32(value); } /// - /// Creates an int from a given . + /// Creates an int from a given . /// - /// The . - /// An int. + /// The . + /// + /// An . + /// public static int ToInt32(AvroDecimal value) { return value.ToType(); } + /// + /// Performs an explicit conversion from to . + /// + /// The . + /// + /// A . + /// public static explicit operator long(AvroDecimal value) { return ToInt64(value); } /// - /// Creates a long from a given . + /// Creates a from a given . /// - /// The . - /// A long. + /// The . + /// + /// A . + /// public static long ToInt64(AvroDecimal value) { return value.ToType(); } + /// + /// Performs an explicit conversion from to . + /// + /// The . + /// + /// An . + /// public static explicit operator ushort(AvroDecimal value) { return ToUInt16(value); } /// - /// Creates an unsigned short from a given . + /// Creates an from a given . /// - /// The . - /// An unsigned short. + /// The . + /// + /// An . + /// public static ushort ToUInt16(AvroDecimal value) { return value.ToType(); } + /// + /// Performs an explicit conversion from to . + /// + /// The . + /// + /// An . + /// public static explicit operator uint(AvroDecimal value) { return ToUInt32(value); } /// - /// Creates an unsigned int from a given . + /// Creates an from a given . /// - /// The . - /// An unsigned int. + /// The . + /// + /// An . + /// public static uint ToUInt32(AvroDecimal value) { return value.ToType(); } + /// + /// Performs an explicit conversion from to . + /// + /// The . + /// + /// An . + /// public static explicit operator ulong(AvroDecimal value) { return ToUInt64(value); } /// - /// Creates an unsigned long from a given . + /// Creates an from a given . /// - /// The . - /// An unsigned long. + /// The . + /// + /// An . + /// public static ulong ToUInt64(AvroDecimal value) { return value.ToType(); } + /// + /// Performs an explicit conversion from to . + /// + /// The . + /// + /// A . + /// public static explicit operator float(AvroDecimal value) { return ToSingle(value); } /// - /// Creates a double from a given . + /// Creates a from a given . /// - /// The . - /// A double. + /// The . + /// + /// A . + /// public static float ToSingle(AvroDecimal value) { return value.ToType(); } + /// + /// Performs an explicit conversion from to . + /// + /// The . + /// + /// A . + /// public static explicit operator double(AvroDecimal value) { return ToDouble(value); } /// - /// Creates a double from a given . + /// Creates a from a given . /// - /// The . - /// A double. + /// The . + /// + /// A . + /// public static double ToDouble(AvroDecimal value) { return value.ToType(); } + /// + /// Performs an explicit conversion from to . + /// + /// The . + /// + /// A . + /// public static explicit operator decimal(AvroDecimal value) { return ToDecimal(value); } /// - /// Creates a decimal from a given . + /// Creates a from a given . /// - /// The . - /// A decimal. + /// The . + /// + /// A . + /// public static decimal ToDecimal(AvroDecimal value) { return value.ToType(); } + /// + /// Performs an explicit conversion from to . + /// + /// The . + /// + /// A . + /// public static explicit operator BigInteger(AvroDecimal value) { return ToBigInteger(value); } /// - /// Creates a from a given . + /// Creates a from a given . /// - /// The . - /// A . + /// The . + /// + /// A . + /// public static BigInteger ToBigInteger(AvroDecimal value) { var scaleDivisor = BigInteger.Pow(new BigInteger(10), value.Scale); @@ -412,71 +688,157 @@ public static BigInteger ToBigInteger(AvroDecimal value) return scaledValue; } + /// + /// Performs an implicit conversion from to . + /// + /// The byte . + /// + /// An . + /// public static implicit operator AvroDecimal(byte value) { return new AvroDecimal(value); } + /// + /// Performs an implicit conversion from to . + /// + /// The . + /// + /// An . + /// public static implicit operator AvroDecimal(sbyte value) { return new AvroDecimal(value); } + /// + /// Performs an implicit conversion from to . + /// + /// The . + /// + /// An . + /// public static implicit operator AvroDecimal(short value) { return new AvroDecimal(value); } + /// + /// Performs an implicit conversion from to . + /// + /// The . + /// + /// An . + /// public static implicit operator AvroDecimal(int value) { return new AvroDecimal(value); } + /// + /// Performs an implicit conversion from to . + /// + /// The . + /// + /// An . + /// public static implicit operator AvroDecimal(long value) { return new AvroDecimal(value); } + /// + /// Performs an implicit conversion from to . + /// + /// The . + /// + /// An . + /// public static implicit operator AvroDecimal(ushort value) { return new AvroDecimal(value); } + /// + /// Performs an implicit conversion from to . + /// + /// The . + /// + /// An . + /// public static implicit operator AvroDecimal(uint value) { return new AvroDecimal(value); } + /// + /// Performs an implicit conversion from to . + /// + /// The . + /// + /// An . + /// public static implicit operator AvroDecimal(ulong value) { return new AvroDecimal(value); } + /// + /// Performs an implicit conversion from to . + /// + /// The . + /// + /// An . + /// public static implicit operator AvroDecimal(float value) { return new AvroDecimal(value); } + /// + /// Performs an implicit conversion from to . + /// + /// The . + /// + /// An . + /// public static implicit operator AvroDecimal(double value) { return new AvroDecimal(value); } + /// + /// Performs an implicit conversion from to . + /// + /// The . + /// + /// An . + /// public static implicit operator AvroDecimal(decimal value) { return new AvroDecimal(value); } + /// + /// Performs an implicit conversion from to . + /// + /// The . + /// + /// An . + /// public static implicit operator AvroDecimal(BigInteger value) { return new AvroDecimal(value, 0); - } + } /// - /// Converts the numeric value of the current to a given type. + /// Converts the numeric value of the current to a given type. /// - /// The type to which the value of the current should be converted. - /// A value of type converted from the current . + /// The type to which the value of the current should be converted. + /// + /// A value of type converted from the current . + /// public T ToType() where T : struct { @@ -484,19 +846,24 @@ public T ToType() } /// - /// Converts the numeric value of the current to a given type. + /// Converts the numeric value of the current to a given type. /// - /// The type to which the value of the current should be converted. + /// The type to which the value of the current should be converted. /// An System.IFormatProvider interface implementation that supplies culture-specific formatting information. - /// + /// + /// An instance of type conversionType whose value is equivalent to the value of this instance. + /// + /// The value {UnscaledValue} cannot fit into {conversionType.Name}. object IConvertible.ToType(Type conversionType, IFormatProvider provider) { var scaleDivisor = BigInteger.Pow(new BigInteger(10), Scale); var remainder = BigInteger.Remainder(UnscaledValue, scaleDivisor); var scaledValue = BigInteger.Divide(UnscaledValue, scaleDivisor); - if (scaledValue > new BigInteger(Decimal.MaxValue)) - throw new ArgumentOutOfRangeException("value", "The value " + UnscaledValue + " cannot fit into " + conversionType.Name + "."); + if (scaledValue > new BigInteger(decimal.MaxValue)) + { + throw new OverflowException($"The value {UnscaledValue} cannot fit into {conversionType.Name}."); + } var leftOfDecimal = (decimal)scaledValue; var rightOfDecimal = ((decimal)remainder) / ((decimal)scaleDivisor); @@ -506,251 +873,306 @@ object IConvertible.ToType(Type conversionType, IFormatProvider provider) } /// - /// Returns a value that indicates whether the current and a specified object + /// Returns a value that indicates whether the current and a specified object /// have the same value. /// /// The object to compare. - /// true if the obj argument is an object, and its value - /// is equal to the value of the current instance; otherwise false. + /// + /// true if the obj argument is an object, and its value + /// is equal to the value of the current instance; otherwise false. /// public override bool Equals(object obj) { - return (obj is AvroDecimal) && Equals((AvroDecimal)obj); + return (obj is AvroDecimal @decimal) && Equals(@decimal); } /// - /// Returns the hash code for the current . + /// Returns the hash code for the current . /// - /// The hash code. + /// + /// The hash code. + /// public override int GetHashCode() { return UnscaledValue.GetHashCode() ^ Scale.GetHashCode(); } /// - /// Returns the for the current . + /// Returns the for the current . /// - /// . + /// + /// The enumerated constant that is the of the class or value type that implements this interface. + /// TypeCode IConvertible.GetTypeCode() { return TypeCode.Object; } /// - /// Converts the current to a boolean. + /// Converts the current to a boolean. /// /// The format provider. - /// true or false, which reflects the value of the current . + /// + /// true or false, which reflects the value of the current . + /// bool IConvertible.ToBoolean(IFormatProvider provider) { return Convert.ToBoolean(this, provider); } /// - /// Converts the current to a byte. + /// Converts the current to a byte. /// /// The format provider. - /// A byte. + /// + /// A . + /// byte IConvertible.ToByte(IFormatProvider provider) { return Convert.ToByte(this, provider); } /// - /// Converts the current to a char. + /// Converts the current to a char. /// /// The format provider. - /// This method always throws an . + /// + /// This method always throws an . + /// + /// Cannot cast BigDecimal to Char. char IConvertible.ToChar(IFormatProvider provider) { throw new InvalidCastException("Cannot cast BigDecimal to Char"); } /// - /// Converts the current to a . + /// Converts the current to a . /// /// The format provider. - /// This method always throws an . + /// + /// This method always throws an . + /// + /// Cannot cast BigDecimal to DateTime. DateTime IConvertible.ToDateTime(IFormatProvider provider) { throw new InvalidCastException("Cannot cast BigDecimal to DateTime"); } /// - /// Converts the current to a decimal. + /// Converts the current to a decimal. /// /// The format provider. - /// A decimal. + /// + /// A . + /// decimal IConvertible.ToDecimal(IFormatProvider provider) { return Convert.ToDecimal(this, provider); } /// - /// Converts the current to a double. + /// Converts the current to a double. /// /// The format provider. - /// A double. + /// + /// A . + /// double IConvertible.ToDouble(IFormatProvider provider) { return Convert.ToDouble(this, provider); } /// - /// Converts the current to a short. + /// Converts the current to a short. /// /// The format provider. - /// A short. + /// + /// A . + /// short IConvertible.ToInt16(IFormatProvider provider) { return Convert.ToInt16(this, provider); } /// - /// Converts the current to an int. + /// Converts the current to an int. /// /// The format provider. - /// An int. + /// + /// An . + /// int IConvertible.ToInt32(IFormatProvider provider) { return Convert.ToInt32(this, provider); } /// - /// Converts the current to a long. + /// Converts the current to a long. /// /// The format provider. - /// A long. + /// + /// A . + /// long IConvertible.ToInt64(IFormatProvider provider) { return Convert.ToInt64(this, provider); } /// - /// Converts the current to a signed byte. + /// Converts the current to a signed byte. /// /// The format provider. - /// A signed byte. + /// + /// A . + /// sbyte IConvertible.ToSByte(IFormatProvider provider) { return Convert.ToSByte(this, provider); } /// - /// Converts the current to a float. + /// Converts the current to a float. /// /// The format provider. - /// A float. + /// + /// A . + /// float IConvertible.ToSingle(IFormatProvider provider) { return Convert.ToSingle(this, provider); } /// - /// Converts the current to a string. + /// Converts the current to a string. /// /// The format provider. - /// A string. + /// + /// A . + /// string IConvertible.ToString(IFormatProvider provider) { return Convert.ToString(this, provider); } /// - /// Converts the current to an unsigned short. + /// Converts the current to an unsigned short. /// /// The format provider. - /// An unsigned short. + /// + /// An . + /// ushort IConvertible.ToUInt16(IFormatProvider provider) { return Convert.ToUInt16(this, provider); } /// - /// Converts the current to an unsigned int. + /// Converts the current to an unsigned int. /// /// The format provider. - /// An unsigned int. + /// + /// An . + /// uint IConvertible.ToUInt32(IFormatProvider provider) { return Convert.ToUInt32(this, provider); } /// - /// Converts the current to an unsigned long. + /// Converts the current to an unsigned long. /// /// The format provider. - /// An unsigned long. + /// + /// An . + /// ulong IConvertible.ToUInt64(IFormatProvider provider) { return Convert.ToUInt64(this, provider); } /// - /// Converts the current to a string. + /// Converts the current to a string. /// - /// + /// The format. /// The format provider. - /// A string representation of the numeric value. + /// + /// A string representation of the numeric value. + /// public string ToString(string format, IFormatProvider formatProvider) { return ToString(); } /// - /// Compares the value of the current to the value of another object. + /// Compares the value of the current to the value of another object. /// /// The object to compare. - /// A value that indicates the relative order of the objects being compared. + /// + /// A value that indicates the relative order of the objects being compared. + /// + /// Compare to object must be a BigDecimal - obj. public int CompareTo(object obj) { if (obj == null) + { return 1; + } if (!(obj is AvroDecimal)) + { throw new ArgumentException("Compare to object must be a BigDecimal", nameof(obj)); + } return CompareTo((AvroDecimal)obj); } /// - /// Compares the value of the current to the value of another - /// . + /// Compares the value of the current to the value of another + /// . /// - /// The to compare. - /// A value that indicates the relative order of the - /// instances being compared. + /// The to compare. + /// + /// A value that indicates the relative order of the + /// instances being compared. + /// public int CompareTo(AvroDecimal other) { var unscaledValueCompare = UnscaledValue.CompareTo(other.UnscaledValue); var scaleCompare = Scale.CompareTo(other.Scale); - // if both are the same value, return the value - if (unscaledValueCompare == scaleCompare) - return unscaledValueCompare; - // if the scales are both the same return unscaled value if (scaleCompare == 0) + { return unscaledValueCompare; + } - var scaledValue = BigInteger.Divide(UnscaledValue, BigInteger.Pow(new BigInteger(10), Scale)); - var otherScaledValue = BigInteger.Divide(other.UnscaledValue, BigInteger.Pow(new BigInteger(10), other.Scale)); + var scaledValue = (decimal) UnscaledValue / (decimal) Math.Pow(10, Scale); + var otherScaledValue = (decimal) other.UnscaledValue / (decimal) Math.Pow(10, other.Scale); return scaledValue.CompareTo(otherScaledValue); } /// - /// Returns a value that indicates whether the current has the same - /// value as another . + /// Returns a value that indicates whether the current has the same + /// value as another . /// - /// The to compare. - /// true if the current has the same value as ; - /// otherwise false. + /// The to compare. + /// + /// true if the current has the same value as ; + /// otherwise false. + /// public bool Equals(AvroDecimal other) { return Scale == other.Scale && UnscaledValue == other.UnscaledValue; } + /// + /// Gets the bytes from decimal. + /// + /// The . + /// + /// A byte array. + /// private static byte[] GetBytesFromDecimal(decimal d) { byte[] bytes = new byte[16]; @@ -781,6 +1203,4 @@ private static byte[] GetBytesFromDecimal(decimal d) return bytes; } } - #pragma warning restore CA2225 // Operator overloads have named alternates - #pragma warning restore CS1591 // Missing XML comment for publicly visible type or member } diff --git a/lang/csharp/src/apache/main/CodeGen/CodeGen.cs b/lang/csharp/src/apache/main/CodeGen/CodeGen.cs index 70ab5bddc74..73b95852d7b 100644 --- a/lang/csharp/src/apache/main/CodeGen/CodeGen.cs +++ b/lang/csharp/src/apache/main/CodeGen/CodeGen.cs @@ -21,8 +21,10 @@ using System.Collections.Generic; using System.Globalization; using System.IO; +using System.Linq; using System.Reflection; using System.Text; +using System.Text.RegularExpressions; using Microsoft.CSharp; namespace Avro @@ -33,120 +35,180 @@ namespace Avro public class CodeGen { /// - /// Object that contains all the generated types + /// Gets object that contains all the generated types. /// + /// + /// The code compile unit. + /// public CodeCompileUnit CompileUnit { get; private set; } /// - /// List of schemas to generate code for + /// Gets list of schemas to generate code for. /// + /// + /// The schemas. + /// public IList Schemas { get; private set; } /// - /// List of protocols to generate code for + /// Gets list of protocols to generate code for. /// + /// + /// The protocols. + /// public IList Protocols { get; private set; } /// - /// Mapping of Avro namespaces to C# namespaces + /// Gets mapping of Avro namespaces to C# namespaces. /// + /// + /// The namespace mapping. + /// + [Obsolete("NamespaceMapping is not used, use AddProtocol(string ...) or AddSchema(string ...) instead!")] public IDictionary NamespaceMapping { get; private set; } /// - /// List of generated namespaces + /// Gets list of generated namespaces. /// - [Obsolete("Use NamespaceLookup instead. This will be removed from the public API in a future version.")] - protected Dictionary namespaceLookup = new Dictionary(StringComparer.Ordinal); + /// + /// The namespace lookup. + /// + protected Dictionary NamespaceLookup { get; private set; } /// - /// List of generated namespaces. + /// Initializes a new instance of the class. /// - protected Dictionary NamespaceLookup + public CodeGen() { -#pragma warning disable CS0618 // Type or member is obsolete - get => namespaceLookup; - set => namespaceLookup = value; -#pragma warning restore CS0618 // Type or member is obsolete + Schemas = new List(); + Protocols = new List(); + NamespaceLookup = new Dictionary(StringComparer.Ordinal); } /// - /// Default constructor + /// Initializes a new instance of the class. /// - public CodeGen() + /// The namespace lookup. + public CodeGen(Dictionary namespaceLookup) + : this() { - this.Schemas = new List(); - this.Protocols = new List(); - this.NamespaceMapping = new Dictionary(); + NamespaceLookup = namespaceLookup; } /// - /// Adds a protocol object to generate code for + /// Adds a protocol object to generate code for. /// - /// protocol object + /// The protocol. public virtual void AddProtocol(Protocol protocol) { Protocols.Add(protocol); } /// - /// Adds a schema object to generate code for + /// Parses and adds a protocol object to generate code for. /// - /// schema object + /// The protocol. + /// namespace mapping key value pairs. + public virtual void AddProtocol(string protocolText, IEnumerable> namespaceMapping = null) + { + // Map namespaces + protocolText = ReplaceMappedNamespacesInSchema(protocolText, namespaceMapping); + Protocol protocol = Protocol.Parse(protocolText); + Protocols.Add(protocol); + } + + /// + /// Adds a schema object to generate code for. + /// + /// schema object. public virtual void AddSchema(Schema schema) { Schemas.Add(schema); } /// - /// Adds a namespace object for the given name into the dictionary if it doesn't exist yet + /// Parses and adds a schema object to generate code for. /// - /// name of namespace - /// - protected virtual CodeNamespace addNamespace(string name) + /// schema object. + /// namespace mapping key value pairs. + public virtual void AddSchema(string schemaText, IEnumerable> namespaceMapping = null) + { + // Map namespaces + schemaText = ReplaceMappedNamespacesInSchema(schemaText, namespaceMapping); + Schema schema = Schema.Parse(schemaText); + Schemas.Add(schema); + } + + /// + /// Adds a namespace object for the given name into the dictionary if it doesn't exist yet. + /// + /// name of namespace. + /// + /// Code Namespace. + /// + /// name - name cannot be null. + protected virtual CodeNamespace AddNamespace(string name) { if (string.IsNullOrEmpty(name)) + { throw new ArgumentNullException(nameof(name), "name cannot be null."); + } - CodeNamespace ns = null; - - if (!NamespaceLookup.TryGetValue(name, out ns)) + if (!NamespaceLookup.TryGetValue(name, out CodeNamespace ns)) { - string csharpNamespace; - ns = NamespaceMapping.TryGetValue(name, out csharpNamespace) - ? new CodeNamespace(csharpNamespace) - : new CodeNamespace(CodeGenUtil.Instance.Mangle(name)); + ns = new CodeNamespace(CodeGenUtil.Instance.Mangle(name)); foreach (CodeNamespaceImport nci in CodeGenUtil.Instance.NamespaceImports) + { ns.Imports.Add(nci); + } CompileUnit.Namespaces.Add(ns); NamespaceLookup.Add(name, ns); } + return ns; } /// - /// Generates code for the given protocol and schema objects + /// Adds a namespace object for the given name into the dictionary if it doesn't exist yet. + /// + /// name of namespace. + /// + /// Code Namespace. + /// + /// name - name cannot be null. + [Obsolete("This method is deprecated and it will be removed in a future release! Please change call to AddNamespace(string name).")] + protected virtual CodeNamespace addNamespace(string name) + { + return AddNamespace(name); + } + + /// + /// Generates code for the given protocol and schema objects. /// - /// CodeCompileUnit object + /// + /// CodeCompileUnit object. + /// public virtual CodeCompileUnit GenerateCode() { CompileUnit = new CodeCompileUnit(); - processSchemas(); - processProtocols(); + ProcessSchemas(); + ProcessProtocols(); return CompileUnit; } /// - /// Generates code for the schema objects + /// Generates code for the schema objects. /// - protected virtual void processSchemas() + /// Names in schema should only be of type NamedSchema, type found " + sn.Value.Tag. + protected virtual void ProcessSchemas() { - foreach (Schema schema in this.Schemas) + foreach (Schema schema in Schemas) { - SchemaNames names = generateNames(schema); + SchemaNames names = GenerateNames(schema); foreach (KeyValuePair sn in names) { switch (sn.Value.Tag) @@ -163,13 +225,24 @@ protected virtual void processSchemas() } /// - /// Generates code for the protocol objects + /// Generates code for the schema objects. /// - protected virtual void processProtocols() + /// Names in schema should only be of type NamedSchema, type found " + sn.Value.Tag. + [Obsolete("This method is deprecated and it will be removed in a future release! Please change call to ProcessSchemas().")] + protected virtual void processSchemas() + { + ProcessSchemas(); + } + + /// + /// Generates code for the protocol objects. + /// + /// Names in protocol should only be of type NamedSchema, type found {sn.Value.Tag} + protected virtual void ProcessProtocols() { foreach (Protocol protocol in Protocols) { - SchemaNames names = generateNames(protocol); + SchemaNames names = GenerateNames(protocol); foreach (KeyValuePair sn in names) { switch (sn.Value.Tag) @@ -179,7 +252,7 @@ protected virtual void processProtocols() case Schema.Type.Record: processRecord(sn.Value); break; case Schema.Type.Error: processRecord(sn.Value); break; default: - throw new CodeGenException("Names in protocol should only be of type NamedSchema, type found " + sn.Value.Tag); + throw new CodeGenException($"Names in protocol should only be of type NamedSchema, type found {sn.Value.Tag}"); } } @@ -188,24 +261,74 @@ protected virtual void processProtocols() } /// - /// Generate list of named schemas from given protocol + /// Generates code for the protocol objects. /// - /// protocol to process - /// + /// Names in protocol should only be of type NamedSchema, type found {sn.Value.Tag} + [Obsolete("This method is deprecated and it will be removed in a future release! Please change call to ProcessProtocols().")] + protected virtual void processProtocols() + { + ProcessProtocols(); + } + + /// + /// Generate list of named schemas from given protocol. + /// + /// protocol to process. + /// + /// List of named schemas. + /// + /// protocol - Protocol can not be null. + [Obsolete("This method is deprecated and it will be removed in a future release! Please use GenerateNames() instead.")] protected virtual SchemaNames generateNames(Protocol protocol) { + return GenerateNames(protocol); + } + + /// + /// Generate list of named schemas from given protocol. + /// + /// protocol to process. + /// + /// List of named schemas. + /// + /// protocol - Protocol can not be null. + protected virtual SchemaNames GenerateNames(Protocol protocol) + { + if (protocol == null) + { + throw new ArgumentNullException(nameof(protocol), "Protocol can not be null"); + } + var names = new SchemaNames(); foreach (Schema schema in protocol.Types) + { addName(schema, names); + } + return names; } /// - /// Generate list of named schemas from given schema + /// Generate list of named schemas from given schema. /// - /// schema to process - /// + /// schema to process. + /// + /// List of named schemas. + /// + [Obsolete("This method is deprecated and it will be removed in a future release! Please use GenerateNames() instead.")] protected virtual SchemaNames generateNames(Schema schema) + { + return GenerateNames(schema); + } + + /// + /// Generate list of named schemas from given schema. + /// + /// schema to process. + /// + /// List of named schemas. + /// + protected virtual SchemaNames GenerateNames(Schema schema) { var names = new SchemaNames(); addName(schema, names); @@ -213,14 +336,18 @@ protected virtual SchemaNames generateNames(Schema schema) } /// - /// Recursively search the given schema for named schemas and adds them to the given container + /// Recursively search the given schema for named schemas and adds them to the given container. /// - /// schema object to search - /// list of named schemas + /// schema object to search. + /// list of named schemas. + /// Unable to add name for " + schema.Name + " type " + schema.Tag. protected virtual void addName(Schema schema, SchemaNames names) { NamedSchema ns = schema as NamedSchema; - if (null != ns) if (names.Contains(ns.SchemaName)) return; + if (ns != null && names.Contains(ns.SchemaName)) + { + return; + } switch (schema.Tag) { @@ -245,7 +372,10 @@ protected virtual void addName(Schema schema, SchemaNames names) var rs = schema as RecordSchema; names.Add(rs); foreach (Field field in rs.Fields) + { addName(field.Schema, names); + } + break; case Schema.Type.Array: @@ -261,7 +391,10 @@ protected virtual void addName(Schema schema, SchemaNames names) case Schema.Type.Union: var us = schema as UnionSchema; foreach (Schema usc in us.Schemas) + { addName(usc, names); + } + break; default: @@ -270,13 +403,21 @@ protected virtual void addName(Schema schema, SchemaNames names) } /// - /// Creates a class declaration for fixed schema + /// Creates a class declaration for fixed schema. /// - /// fixed schema + /// fixed schema. + /// + /// Unable to cast schema into a fixed + /// or + /// Namespace required for enum schema " + fixedSchema.Name. + /// protected virtual void processFixed(Schema schema) { FixedSchema fixedSchema = schema as FixedSchema; - if (null == fixedSchema) throw new CodeGenException("Unable to cast schema into a fixed"); + if (fixedSchema == null) + { + throw new CodeGenException("Unable to cast schema into a fixed"); + } CodeTypeDeclaration ctd = new CodeTypeDeclaration(); ctd.Name = CodeGenUtil.Instance.Mangle(fixedSchema.Name); @@ -284,6 +425,7 @@ protected virtual void processFixed(Schema schema) ctd.IsPartial = true; ctd.Attributes = MemberAttributes.Public; ctd.BaseTypes.Add("SpecificFixed"); + ctd.CustomAttributes.Add(CodeGenUtil.Instance.GeneratedCodeAttribute); if (fixedSchema.Documentation != null) { @@ -317,23 +459,35 @@ protected virtual void processFixed(Schema schema) string nspace = fixedSchema.Namespace; if (string.IsNullOrEmpty(nspace)) + { throw new CodeGenException("Namespace required for enum schema " + fixedSchema.Name); - CodeNamespace codens = addNamespace(nspace); + } + + CodeNamespace codens = AddNamespace(nspace); codens.Types.Add(ctd); } /// - /// Creates an enum declaration + /// Creates an enum declaration. /// - /// enum schema + /// enum schema. + /// + /// Unable to cast schema into an enum + /// or + /// Namespace required for enum schema " + enumschema.Name. + /// protected virtual void processEnum(Schema schema) { EnumSchema enumschema = schema as EnumSchema; - if (null == enumschema) throw new CodeGenException("Unable to cast schema into an enum"); + if (enumschema == null) + { + throw new CodeGenException("Unable to cast schema into an enum"); + } CodeTypeDeclaration ctd = new CodeTypeDeclaration(CodeGenUtil.Instance.Mangle(enumschema.Name)); ctd.IsEnum = true; ctd.Attributes = MemberAttributes.Public; + ctd.CustomAttributes.Add(CodeGenUtil.Instance.GeneratedCodeAttribute); if (enumschema.Documentation != null) { @@ -342,16 +496,17 @@ protected virtual void processEnum(Schema schema) foreach (string symbol in enumschema.Symbols) { - if (CodeGenUtil.Instance.ReservedKeywords.Contains(symbol)) - throw new CodeGenException("Enum symbol " + symbol + " is a C# reserved keyword"); CodeMemberField field = new CodeMemberField(typeof(int), symbol); ctd.Members.Add(field); } string nspace = enumschema.Namespace; if (string.IsNullOrEmpty(nspace)) + { throw new CodeGenException("Namespace required for enum schema " + enumschema.Name); - CodeNamespace codens = addNamespace(nspace); + } + + CodeNamespace codens = AddNamespace(nspace); codens.Types.Add(ctd); } @@ -360,6 +515,7 @@ protected virtual void processEnum(Schema schema) /// Generates code for an individual protocol. /// /// Protocol to generate code for. + /// Namespace required for enum schema " + nspace. protected virtual void processInterface(Protocol protocol) { // Create abstract class @@ -369,6 +525,7 @@ protected virtual void processInterface(Protocol protocol) ctd.TypeAttributes = TypeAttributes.Abstract | TypeAttributes.Public; ctd.IsClass = true; ctd.BaseTypes.Add("Avro.Specific.ISpecificProtocol"); + ctd.CustomAttributes.Add(CodeGenUtil.Instance.GeneratedCodeAttribute); AddProtocolDocumentation(protocol, ctd); @@ -394,15 +551,14 @@ protected virtual void processInterface(Protocol protocol) property.Type = new CodeTypeReference("Avro.Protocol"); property.HasGet = true; - property.GetStatements.Add(new CodeTypeReferenceExpression("return protocol")); ctd.Members.Add(property); - //var requestMethod = CreateRequestMethod(); - //ctd.Members.Add(requestMethod); - + // var requestMethod = CreateRequestMethod(); + // ctd.Members.Add(requestMethod); var requestMethod = CreateRequestMethod(); - //requestMethod.Attributes |= MemberAttributes.Override; + + // requestMethod.Attributes |= MemberAttributes.Override; var builder = new StringBuilder(); if (protocol.Messages.Count > 0) @@ -425,6 +581,7 @@ protected virtual void processInterface(Protocol protocol) builder.Append("\t\t\t}"); } + var cseGet = new CodeSnippetExpression(builder.ToString()); requestMethod.Statements.Add(cseGet); @@ -434,8 +591,11 @@ protected virtual void processInterface(Protocol protocol) string nspace = protocol.Namespace; if (string.IsNullOrEmpty(nspace)) + { throw new CodeGenException("Namespace required for enum schema " + nspace); - CodeNamespace codens = addNamespace(nspace); + } + + CodeNamespace codens = AddNamespace(nspace); codens.Types.Add(ctd); @@ -444,11 +604,9 @@ protected virtual void processInterface(Protocol protocol) ctd.TypeAttributes = TypeAttributes.Abstract | TypeAttributes.Public; ctd.IsClass = true; ctd.BaseTypes.Add(protocolNameMangled); + ctd.CustomAttributes.Add(CodeGenUtil.Instance.GeneratedCodeAttribute); // Need to override - - - AddProtocolDocumentation(protocol, ctd); AddMethods(protocol, true, ctd); @@ -456,29 +614,40 @@ protected virtual void processInterface(Protocol protocol) codens.Types.Add(ctd); } + /// + /// Creates the request method. + /// + /// A declaration for a method of a type. private static CodeMemberMethod CreateRequestMethod() { var requestMethod = new CodeMemberMethod(); requestMethod.Attributes = MemberAttributes.Public | MemberAttributes.Final; requestMethod.Name = "Request"; - requestMethod.ReturnType = new CodeTypeReference(typeof (void)); + requestMethod.ReturnType = new CodeTypeReference(typeof(void)); { - var requestor = new CodeParameterDeclarationExpression(typeof (Avro.Specific.ICallbackRequestor), + var requestor = new CodeParameterDeclarationExpression(typeof(Specific.ICallbackRequestor), "requestor"); requestMethod.Parameters.Add(requestor); - var messageName = new CodeParameterDeclarationExpression(typeof (string), "messageName"); + var messageName = new CodeParameterDeclarationExpression(typeof(string), "messageName"); requestMethod.Parameters.Add(messageName); - var args = new CodeParameterDeclarationExpression(typeof (object[]), "args"); + var args = new CodeParameterDeclarationExpression(typeof(object[]), "args"); requestMethod.Parameters.Add(args); - var callback = new CodeParameterDeclarationExpression(typeof (object), "callback"); + var callback = new CodeParameterDeclarationExpression(typeof(object), "callback"); requestMethod.Parameters.Add(callback); } + return requestMethod; } + /// + /// Adds the methods. + /// + /// The protocol. + /// if set to true [generate callback]. + /// The CTD. private static void AddMethods(Protocol protocol, bool generateCallback, CodeTypeDeclaration ctd) { foreach (var e in protocol.Messages) @@ -488,18 +657,22 @@ private static void AddMethods(Protocol protocol, bool generateCallback, CodeTyp var response = message.Response; if (generateCallback && message.Oneway.GetValueOrDefault()) + { continue; + } var messageMember = new CodeMemberMethod(); messageMember.Name = CodeGenUtil.Instance.Mangle(name); messageMember.Attributes = MemberAttributes.Public | MemberAttributes.Abstract; - if (message.Doc!= null && message.Doc.Trim() != string.Empty) + if (message.Doc != null && message.Doc.Trim() != string.Empty) + { messageMember.Comments.Add(new CodeCommentStatement(message.Doc)); + } if (message.Oneway.GetValueOrDefault() || generateCallback) { - messageMember.ReturnType = new CodeTypeReference(typeof (void)); + messageMember.ReturnType = new CodeTypeReference(typeof(void)); } else { @@ -528,11 +701,15 @@ private static void AddMethods(Protocol protocol, bool generateCallback, CodeTyp messageMember.Parameters.Add(parameter); } - ctd.Members.Add(messageMember); } } + /// + /// Adds the protocol documentation. + /// + /// The protocol. + /// The CTD. private void AddProtocolDocumentation(Protocol protocol, CodeTypeDeclaration ctd) { // Add interface documentation @@ -540,25 +717,41 @@ private void AddProtocolDocumentation(Protocol protocol, CodeTypeDeclaration ctd { var interfaceDoc = createDocComment(protocol.Doc); if (interfaceDoc != null) + { ctd.Comments.Add(interfaceDoc); + } } } /// - /// Creates a class declaration + /// Creates a class declaration. /// - /// record schema - /// A new class code type declaration + /// record schema. + /// + /// A new class code type declaration. + /// + /// + /// Unable to cast schema into a record + /// or + /// Namespace required for record schema " + recordSchema.Name. + /// protected virtual CodeTypeDeclaration processRecord(Schema schema) { RecordSchema recordSchema = schema as RecordSchema; - if (null == recordSchema) throw new CodeGenException("Unable to cast schema into a record"); + if (recordSchema == null) + { + throw new CodeGenException("Unable to cast schema into a record"); + } bool isError = recordSchema.Tag == Schema.Type.Error; // declare the class var ctd = new CodeTypeDeclaration(CodeGenUtil.Instance.Mangle(recordSchema.Name)); - ctd.BaseTypes.Add(isError ? "SpecificException" : "ISpecificRecord"); + var baseTypeReference = new CodeTypeReference( + isError ? typeof(Specific.SpecificException) : typeof(Specific.ISpecificRecord), + CodeTypeReferenceOptions.GlobalReference); + ctd.BaseTypes.Add(baseTypeReference); + ctd.CustomAttributes.Add(CodeGenUtil.Instance.GeneratedCodeAttribute); ctd.Attributes = MemberAttributes.Public; ctd.IsClass = true; @@ -609,7 +802,7 @@ protected virtual CodeTypeDeclaration processRecord(Schema schema) codeField.Attributes = MemberAttributes.Private; if (field.Schema is EnumSchema es && es.Default != null) { - codeField.InitExpression = new CodeTypeReferenceExpression($"{es.Name}.{es.Default}"); + codeField.InitExpression = new CodeTypeReferenceExpression($"{es.Namespace}.{es.Name}.{es.Default}"); } // Process field documentation if it exist and add to the field @@ -617,8 +810,10 @@ protected virtual CodeTypeDeclaration processRecord(Schema schema) if (!string.IsNullOrEmpty(field.Documentation)) { propertyComment = createDocComment(field.Documentation); - if (null != propertyComment) + if (propertyComment != null) + { codeField.Comments.Add(propertyComment); + } } // Add field to class @@ -635,8 +830,10 @@ protected virtual CodeTypeDeclaration processRecord(Schema schema) property.Type = ctrfield; property.GetStatements.Add(new CodeMethodReturnStatement(fieldRef)); property.SetStatements.Add(new CodeAssignStatement(fieldRef, new CodePropertySetValueReferenceExpression())); - if (null != propertyComment) + if (propertyComment != null) + { property.Comments.Add(propertyComment); + } // Add field property to class ctd.Members.Add(property); @@ -675,14 +872,14 @@ protected virtual CodeTypeDeclaration processRecord(Schema schema) } // end switch block for Get() - getFieldStmt.AppendLine("\t\t\tdefault: throw new AvroRuntimeException(\"Bad index \" + fieldPos + \" in Get()\");") + getFieldStmt.AppendLine("\t\t\tdefault: throw new global::Avro.AvroRuntimeException(\"Bad index \" + fieldPos + \" in Get()\");") .Append("\t\t\t}"); var cseGet = new CodeSnippetExpression(getFieldStmt.ToString()); cmmGet.Statements.Add(cseGet); ctd.Members.Add(cmmGet); // end switch block for Put() - putFieldStmt.AppendLine("\t\t\tdefault: throw new AvroRuntimeException(\"Bad index \" + fieldPos + \" in Put()\");") + putFieldStmt.AppendLine("\t\t\tdefault: throw new global::Avro.AvroRuntimeException(\"Bad index \" + fieldPos + \" in Put()\");") .Append("\t\t\t}"); var csePut = new CodeSnippetExpression(putFieldStmt.ToString()); cmmPut.Statements.Add(csePut); @@ -690,8 +887,11 @@ protected virtual CodeTypeDeclaration processRecord(Schema schema) string nspace = recordSchema.Namespace; if (string.IsNullOrEmpty(nspace)) + { throw new CodeGenException("Namespace required for record schema " + recordSchema.Name); - CodeNamespace codens = addNamespace(nspace); + } + + CodeNamespace codens = AddNamespace(nspace); codens.Types.Add(ctd); @@ -699,15 +899,30 @@ protected virtual CodeTypeDeclaration processRecord(Schema schema) } /// - /// Gets the string representation of the schema's data type + /// Gets the string representation of the schema's data type. /// - /// schema - /// flag to indicate union with null - /// - /// This method sets this value to indicate whether the enum is nullable. True indicates - /// that it is nullable. False indicates that it is not nullable. - /// - /// Name of the schema's C# type representation. + /// schema. + /// flag to indicate union with null. + /// This method sets this value to indicate whether the enum is nullable. True indicates + /// that it is nullable. False indicates that it is not nullable. + /// + /// Name of the schema's C# type representation. + /// + /// + /// Unable to cast schema into a named schema + /// or + /// Unable to cast schema into a named schema + /// or + /// Unable to cast schema into an array schema + /// or + /// Unable to cast schema into a map schema + /// or + /// Unable to cast schema into a union schema + /// or + /// Unable to cast schema into a logical schema + /// or + /// Unable to generate CodeTypeReference for " + schema.Name + " type " + schema.Tag. + /// internal static string getType(Schema schema, bool nullible, ref bool nullibleEnum) { switch (schema.Tag) @@ -715,20 +930,19 @@ internal static string getType(Schema schema, bool nullible, ref bool nullibleEn case Schema.Type.Null: return typeof(object).ToString(); case Schema.Type.Boolean: - if (nullible) return $"System.Nullable<{typeof(bool)}>"; - else return typeof(bool).ToString(); + return nullible ? $"System.Nullable<{typeof(bool)}>" : typeof(bool).ToString(); + case Schema.Type.Int: - if (nullible) return $"System.Nullable<{typeof(int)}>"; - else return typeof(int).ToString(); + return nullible ? $"System.Nullable<{typeof(int)}>" : typeof(int).ToString(); + case Schema.Type.Long: - if (nullible) return $"System.Nullable<{typeof(long)}>"; - else return typeof(long).ToString(); + return nullible ? $"System.Nullable<{typeof(long)}>" : typeof(long).ToString(); + case Schema.Type.Float: - if (nullible) return $"System.Nullable<{typeof(float)}>"; - else return typeof(float).ToString(); + return nullible ? $"System.Nullable<{typeof(float)}>" : typeof(float).ToString(); + case Schema.Type.Double: - if (nullible) return $"System.Nullable<{typeof(double)}>"; - else return typeof(double).ToString(); + return nullible ? $"System.Nullable<{typeof(double)}>" : typeof(double).ToString(); case Schema.Type.Bytes: return typeof(byte[]).ToString(); @@ -737,108 +951,132 @@ internal static string getType(Schema schema, bool nullible, ref bool nullibleEn case Schema.Type.Enumeration: var namedSchema = schema as NamedSchema; - if (null == namedSchema) + if (namedSchema == null) + { throw new CodeGenException("Unable to cast schema into a named schema"); + } + if (nullible) { nullibleEnum = true; return "System.Nullable<" + CodeGenUtil.Instance.Mangle(namedSchema.Fullname) + ">"; } - else return CodeGenUtil.Instance.Mangle(namedSchema.Fullname); + else + { + return CodeGenUtil.Instance.Mangle(namedSchema.Fullname); + } case Schema.Type.Fixed: case Schema.Type.Record: case Schema.Type.Error: namedSchema = schema as NamedSchema; - if (null == namedSchema) + if (namedSchema == null) + { throw new CodeGenException("Unable to cast schema into a named schema"); + } + return CodeGenUtil.Instance.Mangle(namedSchema.Fullname); case Schema.Type.Array: var arraySchema = schema as ArraySchema; - if (null == arraySchema) + if (arraySchema == null) + { throw new CodeGenException("Unable to cast schema into an array schema"); + } return "IList<" + getType(arraySchema.ItemSchema, false, ref nullibleEnum) + ">"; case Schema.Type.Map: var mapSchema = schema as MapSchema; - if (null == mapSchema) + if (mapSchema == null) + { throw new CodeGenException("Unable to cast schema into a map schema"); + } + return "IDictionary"; case Schema.Type.Union: var unionSchema = schema as UnionSchema; - if (null == unionSchema) + if (unionSchema == null) + { throw new CodeGenException("Unable to cast schema into a union schema"); - Schema nullibleType = getNullableType(unionSchema); - if (null == nullibleType) - return CodeGenUtil.Object; - else - return getType(nullibleType, true, ref nullibleEnum); + } + + Schema nullibleType = GetNullableType(unionSchema); + + return nullibleType == null ? CodeGenUtil.Object : getType(nullibleType, true, ref nullibleEnum); case Schema.Type.Logical: var logicalSchema = schema as LogicalSchema; - if (null == logicalSchema) - throw new CodeGenException("Unable to cast schema into a logical schema"); - var csharpType = logicalSchema.LogicalType.GetCSharpType(nullible); - if (csharpType.IsGenericType && csharpType.GetGenericTypeDefinition() == typeof(Nullable<>)) + if (logicalSchema == null) { - return $"System.Nullable<{csharpType.GetGenericArguments()[0]}>"; - } - else - { - return csharpType.ToString(); + throw new CodeGenException("Unable to cast schema into a logical schema"); } + var csharpType = logicalSchema.LogicalType.GetCSharpType(nullible); + return csharpType.IsGenericType && csharpType.GetGenericTypeDefinition() == typeof(Nullable<>) + ? $"System.Nullable<{csharpType.GetGenericArguments()[0]}>" : csharpType.ToString(); } + throw new CodeGenException("Unable to generate CodeTypeReference for " + schema.Name + " type " + schema.Tag); } /// - /// Gets the schema of a union with null + /// Gets the schema of a union with null. /// - /// union schema - /// schema that is nullible + /// union schema. + /// + /// schema that is nullable. + /// + /// schema - UnionSchema can not be null. + [Obsolete("This method is deprecated and it will be removed in a future release! Please use GetNullableType() instead.")] public static Schema getNullableType(UnionSchema schema) { - Schema ret = null; - if (schema.Count == 2) + return GetNullableType(schema); + } + + /// + /// Gets the schema of a union with null. + /// + /// union schema. + /// + /// schema that is nullable. + /// + /// schema - UnionSchema can not be null. + public static Schema GetNullableType(UnionSchema schema) + { + if (schema == null) { - bool nullable = false; - foreach (Schema childSchema in schema.Schemas) - { - if (childSchema.Tag == Schema.Type.Null) - nullable = true; - else - ret = childSchema; - } - if (!nullable) - ret = null; + throw new ArgumentNullException(nameof(schema), "UnionSchema can not be null"); } - return ret; + + if (schema.Count == 2 && !schema.Schemas.All(x => x.Tag != Schema.Type.Null)) + { + return schema.Schemas.FirstOrDefault(x => x.Tag != Schema.Type.Null); + } + + return null; } /// - /// Creates the static schema field for class types + /// Creates the static schema field for class types. /// - /// schema - /// CodeTypeDeclaration for the class - /// - /// Indicates whether we should add the to the - /// generated property. - /// + /// schema. + /// CodeTypeDeclaration for the class. + /// Indicates whether we should add the to the + /// generated property. protected virtual void createSchemaField(Schema schema, CodeTypeDeclaration ctd, bool overrideFlag) { // create schema field - var ctrfield = new CodeTypeReference("Schema"); + var ctrfield = new CodeTypeReference(typeof(Schema), CodeTypeReferenceOptions.GlobalReference); string schemaFname = "_SCHEMA"; var codeField = new CodeMemberField(ctrfield, schemaFname); codeField.Attributes = MemberAttributes.Public | MemberAttributes.Static; + // create function call Schema.Parse(json) var cpe = new CodePrimitiveExpression(schema.ToString()); var cmie = new CodeMethodInvokeExpression( - new CodeMethodReferenceExpression(new CodeTypeReferenceExpression(typeof(Schema)), "Parse"), + new CodeMethodReferenceExpression(new CodeTypeReferenceExpression(ctrfield), "Parse"), new CodeExpression[] { cpe }); codeField.InitExpression = cmie; ctd.Members.Add(codeField); @@ -846,7 +1084,11 @@ protected virtual void createSchemaField(Schema schema, CodeTypeDeclaration ctd, // create property to get static schema field var property = new CodeMemberProperty(); property.Attributes = MemberAttributes.Public; - if (overrideFlag) property.Attributes |= MemberAttributes.Override; + if (overrideFlag) + { + property.Attributes |= MemberAttributes.Override; + } + property.Name = "Schema"; property.Type = ctrfield; @@ -855,10 +1097,12 @@ protected virtual void createSchemaField(Schema schema, CodeTypeDeclaration ctd, } /// - /// Creates an XML documentation for the given comment + /// Creates an XML documentation for the given comment. /// - /// comment - /// CodeCommentStatement object + /// comment. + /// + /// a statement consisting of a single comment. + /// protected virtual CodeCommentStatement createDocComment(string comment) { string text = string.Format(CultureInfo.InvariantCulture, @@ -867,9 +1111,9 @@ protected virtual CodeCommentStatement createDocComment(string comment) } /// - /// Writes the generated compile unit into one file + /// Writes the generated compile unit into one file. /// - /// name of output file to write to + /// name of output file to write to. public virtual void WriteCompileUnit(string outputFile) { var cscp = new CSharpCodeProvider(); @@ -886,10 +1130,55 @@ public virtual void WriteCompileUnit(string outputFile) } /// - /// Writes each types in each namespaces into individual files + /// Gets names and generated code of the schema(s) types /// - /// name of directory to write to - public virtual void WriteTypes(string outputdir) + /// + public virtual IDictionary GetTypes() + { + using (var cscp = new CSharpCodeProvider()) + { + var opts = new CodeGeneratorOptions + { + BracingStyle = "C", IndentString = "\t", BlankLinesBetweenMembers = false + }; + CodeNamespaceCollection nsc = CompileUnit.Namespaces; + + var sourceCodeByName = new Dictionary(); + for (int i = 0; i < nsc.Count; i++) + { + var ns = nsc[i]; + + var new_ns = new CodeNamespace(ns.Name); + new_ns.Comments.Add(CodeGenUtil.Instance.FileComment); + foreach (CodeNamespaceImport nci in CodeGenUtil.Instance.NamespaceImports) + { + new_ns.Imports.Add(nci); + } + + var types = ns.Types; + for (int j = 0; j < types.Count; j++) + { + var ctd = types[j]; + using (var writer = new StringWriter()) + { + new_ns.Types.Add(ctd); + cscp.GenerateCodeFromNamespace(new_ns, writer, opts); + new_ns.Types.Remove(ctd); + sourceCodeByName[ctd.Name] = writer.ToString(); + } + } + } + + return sourceCodeByName; + } + } + + /// + /// Writes each types in each namespaces into individual files. + /// + /// name of directory to write to. + /// skip creation of directories based on schema namespace + public virtual void WriteTypes(string outputdir, bool skipDirectories = false) { var cscp = new CSharpCodeProvider(); @@ -904,16 +1193,21 @@ public virtual void WriteTypes(string outputdir) var ns = nsc[i]; string dir = outputdir; - foreach (string name in CodeGenUtil.Instance.UnMangle(ns.Name).Split('.')) + if (skipDirectories != true) { - dir = Path.Combine(dir, name); + foreach (string name in CodeGenUtil.Instance.UnMangle(ns.Name).Split('.')) + { + dir = Path.Combine(dir, name); + } } Directory.CreateDirectory(dir); var new_ns = new CodeNamespace(ns.Name); new_ns.Comments.Add(CodeGenUtil.Instance.FileComment); foreach (CodeNamespaceImport nci in CodeGenUtil.Instance.NamespaceImports) + { new_ns.Imports.Add(nci); + } var types = ns.Types; for (int j = 0; j < types.Count; j++) @@ -929,5 +1223,48 @@ public virtual void WriteTypes(string outputdir) } } } + + /// + /// Replace namespace(s) in schema or protocol definition. + /// + /// input schema or protocol definition. + /// namespace mappings object. + private static string ReplaceMappedNamespacesInSchema(string input, IEnumerable> namespaceMapping) + { + if (namespaceMapping == null || input == null) + return input; + + // Replace namespace in "namespace" definitions: + // "namespace": "originalnamespace" -> "namespace": "mappednamespace" + // "namespace": "originalnamespace.whatever" -> "namespace": "mappednamespace.whatever" + // Note: It keeps the original whitespaces + return Regex.Replace(input, @"""namespace""(\s*):(\s*)""([^""]*)""", m => + { + // m.Groups[1]: whitespaces before ':' + // m.Groups[2]: whitespaces after ':' + // m.Groups[3]: the namespace + + string ns = m.Groups[3].Value; + + foreach (var mapping in namespaceMapping) + { + // Full match + if (mapping.Key == ns) + { + ns = mapping.Value; + break; + } + else + // Partial match + if (ns.StartsWith($"{mapping.Key}.")) + { + ns = $"{mapping.Value}.{ns.Substring(mapping.Key.Length + 1)}"; + break; + } + } + + return $@"""namespace""{m.Groups[1].Value}:{m.Groups[2].Value}""{ns}"""; + }); + } } } diff --git a/lang/csharp/src/apache/main/CodeGen/CodeGenException.cs b/lang/csharp/src/apache/main/CodeGen/CodeGenException.cs index 485646fd033..1601a1e285d 100644 --- a/lang/csharp/src/apache/main/CodeGen/CodeGenException.cs +++ b/lang/csharp/src/apache/main/CodeGen/CodeGenException.cs @@ -19,17 +19,34 @@ namespace Avro { - class CodeGenException : AvroException + /// + /// CodeGen Exception. + /// + /// + public class CodeGenException : AvroException { + /// + /// Initializes a new instance of the class. + /// public CodeGenException() { } + /// + /// Initializes a new instance of the class. + /// + /// The message that describes the error. public CodeGenException(string s) : base(s) { } + /// + /// Initializes a new instance of the class. + /// + /// The message that describes the error. + /// The exception that is the cause of the current exception, or a null reference + /// if no inner exception is specified. public CodeGenException(string s, Exception inner) : base(s, inner) { diff --git a/lang/csharp/src/apache/main/CodeGen/CodeGenUtil.cs b/lang/csharp/src/apache/main/CodeGen/CodeGenUtil.cs index 54de067923d..1a720e16533 100644 --- a/lang/csharp/src/apache/main/CodeGen/CodeGenUtil.cs +++ b/lang/csharp/src/apache/main/CodeGen/CodeGenUtil.cs @@ -15,64 +15,94 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +using System.CodeDom; +using System.CodeDom.Compiler; using System.Collections.Generic; +using System.Reflection; using System.Text; -using System.CodeDom; namespace Avro { /// - /// A singleton class containing data used by codegen + /// A singleton class containing data used by codegen. /// public sealed class CodeGenUtil { /// - /// Singleton instance of this class. + /// Gets singleton instance of this class. /// + /// + /// The instance. + /// public static CodeGenUtil Instance { get; } = new CodeGenUtil(); /// - /// Namespaces to import in generated code. + /// Gets namespaces to import in generated code. /// + /// + /// The namespace imports. + /// public CodeNamespaceImport[] NamespaceImports { get; private set; } /// - /// Comment included at the top of each generated code file. + /// Gets comment included at the top of each generated code file. /// + /// + /// The file comment. + /// public CodeCommentStatement FileComment { get; private set; } /// - /// Reserved keywords in the C# language. + /// Gets reserved keywords in the C# language. /// + /// + /// The reserved keywords. + /// public HashSet ReservedKeywords { get; private set; } + /// + /// Gets the generated code attribute. + /// + /// + /// The generated code attribute. + /// + public CodeAttributeDeclaration GeneratedCodeAttribute { get; private set; } + private const char At = '@'; private const char Dot = '.'; + private readonly string _assemblyInformationVersion = GetInformationalVersion(); /// - /// Fully-qualified name of a type. + /// Fully-qualified name of a type. /// public const string Object = "System.Object"; + /// + /// Prevents a default instance of the class from being created. + /// private CodeGenUtil() { NamespaceImports = new CodeNamespaceImport[] { new CodeNamespaceImport("System"), new CodeNamespaceImport("System.Collections.Generic"), new CodeNamespaceImport("System.Text"), - new CodeNamespaceImport("Avro"), - new CodeNamespaceImport("Avro.Specific") }; + new CodeNamespaceImport("global::Avro"), + new CodeNamespaceImport("global::Avro.Specific") }; FileComment = new CodeCommentStatement( @"------------------------------------------------------------------------------ - Generated by " + System.AppDomain.CurrentDomain.FriendlyName + ", version " + System.Reflection.Assembly.GetExecutingAssembly().GetName().Version + @" + Generated by " + System.AppDomain.CurrentDomain.FriendlyName + ", version " + _assemblyInformationVersion + @" Changes to this file may cause incorrect behavior and will be lost if code is regenerated ------------------------------------------------------------------------------"); // Visual Studio 2010 https://msdn.microsoft.com/en-us/library/x53a06bb.aspx + // Note: + // 1. Contextual keywords are not reserved keywords e.g. value, partial + // 2. __arglist, __makeref, __reftype, __refvalue are undocumented keywords, but recognized by the C# compiler ReservedKeywords = new HashSet() { "abstract","as", "base", "bool", "break", "byte", "case", "catch", "char", "checked", "class", "const", "continue", "decimal", "default", "delegate", "do", "double", "else", "enum", "event", @@ -81,14 +111,17 @@ is regenerated "null", "object", "operator", "out", "override", "params", "private", "protected", "public", "readonly", "ref", "return", "sbyte", "sealed", "short", "sizeof", "stackalloc", "static", "string", "struct", "switch", "this", "throw", "true", "try", "typeof", "uint", "ulong", - "unchecked", "unsafe", "ushort", "using", "virtual", "void", "volatile", "while", "value", "partial" }; + "unchecked", "unsafe", "ushort", "using", "virtual", "void", "volatile", "while", + "__arglist", "__makeref", "__reftype", "__refvalue" }; + + GeneratedCodeAttribute = GetGeneratedCodeAttribute(); } /// - /// Append @ to all reserved keywords that appear on the given name + /// Append @ to all reserved keywords that appear on the given name. /// - /// - /// + /// The name. + /// updated string. public string Mangle(string name) { var builder = new StringBuilder(); @@ -105,10 +138,10 @@ public string Mangle(string name) } /// - /// Remove all the @ + /// Remove all the @. /// - /// - /// + /// The name. + /// updated string. public string UnMangle(string name) { var builder = new StringBuilder(name.Length); @@ -117,5 +150,32 @@ public string UnMangle(string name) builder.Append(name[i]); return builder.ToString(); } + + private CodeAttributeDeclaration GetGeneratedCodeAttribute() + { + GeneratedCodeAttribute generatedCodeAttribute = + new GeneratedCodeAttribute(System.AppDomain.CurrentDomain.FriendlyName, + _assemblyInformationVersion); + + CodePrimitiveExpression tool = new CodePrimitiveExpression(generatedCodeAttribute.Tool); + CodePrimitiveExpression version = new CodePrimitiveExpression(generatedCodeAttribute.Version); + + CodeAttributeDeclaration codeAttributeDeclaration = + new CodeAttributeDeclaration($"global::{generatedCodeAttribute.GetType().FullName}", + new CodeAttributeArgument(tool), + new CodeAttributeArgument(version)); + + return codeAttributeDeclaration; + } + + private static string GetInformationalVersion() + { + System.Reflection.AssemblyInformationalVersionAttribute attribute = + (System.Reflection.AssemblyInformationalVersionAttribute) + System.Reflection.Assembly.GetExecutingAssembly() + .GetCustomAttribute(typeof(System.Reflection.AssemblyInformationalVersionAttribute)); + + return attribute.InformationalVersion; + } } } diff --git a/lang/csharp/src/apache/main/File/Codec.cs b/lang/csharp/src/apache/main/File/Codec.cs index f8667f7cccf..46191997a1d 100644 --- a/lang/csharp/src/apache/main/File/Codec.cs +++ b/lang/csharp/src/apache/main/File/Codec.cs @@ -16,7 +16,10 @@ * limitations under the License. */ +using System; +using System.Collections.Generic; using System.IO; +using System.Reflection; namespace Avro.File { @@ -27,102 +30,199 @@ namespace Avro.File public abstract class Codec { /// - /// Compress data using implemented codec + /// Compress data using implemented codec. /// - /// - /// - abstract public byte[] Compress(byte[] uncompressedData); + /// The uncompressed data. + /// + /// byte array. + /// + public abstract byte[] Compress(byte[] uncompressedData); /// - /// Compress data using implemented codec + /// Compress data using implemented codec. /// - /// The stream which contains the data to be compressed + /// The stream which contains the data to be compressed. /// A reusable stream which will hold the compressed data. That stream should be empty. - abstract public void Compress(MemoryStream inputStream, MemoryStream outputStream); + public abstract void Compress(MemoryStream inputStream, MemoryStream outputStream); + + /// + /// Decompress data using implemented codec. + /// + /// The buffer holding data to decompress. + /// A byte array holding the decompressed data. + [Obsolete] + public virtual byte[] Decompress(byte[] compressedData) + { + return Decompress(compressedData, compressedData.Length); + } /// /// Decompress data using implemented codec /// - /// - /// - abstract public byte[] Decompress(byte[] compressedData); + /// The buffer holding data to decompress. + /// The actual length of bytes to decompress from the buffer. + /// A byte array holding the decompressed data. + public abstract byte[] Decompress(byte[] compressedData, int length); /// - /// Name of this codec type + /// Name of this codec type. /// - /// - abstract public string GetName(); + /// The codec name. + public abstract string GetName(); /// - /// Codecs must implement an equals() method + /// Codecs must implement an equals() method. /// - /// - /// - abstract public override bool Equals(object other); + /// The to compare with this instance. + /// + /// true if the specified is equal to this instance; otherwise, false. + /// + public abstract override bool Equals(object other); /// /// Codecs must implement a HashCode() method that is - /// consistent with Equals + /// consistent with Equals. /// - /// - abstract public override int GetHashCode(); + /// + /// A hash code for this instance, suitable for use in hashing algorithms and data structures like a hash table. + /// + public abstract override int GetHashCode(); /// - /// Codec types + /// Codec types. /// public enum Type { /// - /// Codec type that implments the "deflate" compression algorithm. + /// Codec type that implements the "deflate" compression algorithm. /// Deflate, - //Snappy - /// /// Codec that does not perform any compression. /// - Null - }; + Null, + + /// + /// Codec type that implements the "Snappy" compression algorithm. + /// + Snappy, + + /// + /// Codec type that implements the "BZip2" compression algorithm. + /// + BZip2, + + /// + /// Codec type that implements the "XZ" compression algorithm. + /// + XZ, + + /// + /// Codec type that implements the "Zstandard" compression algorithm. + /// + Zstandard + } + + /// + /// Represents a function capable of resolving a codec string + /// with a matching codec implementation a reader can use to decompress data. + /// + /// The codec string + public delegate Codec CodecResolver(string codecMetaString); + + /// + /// The codec resolvers + /// + private static readonly List _codecResolvers = new List(); + + /// + /// Registers a function that will attempt to resolve a codec identifying string + /// with a matching codec implementation when reading compressed Avro data. + /// + /// A function that is able to find a codec implementation for a given codec string + public static void RegisterResolver(CodecResolver resolver) + { + _codecResolvers.Add(resolver); + } /// - /// Factory method to return child - /// codec instance based on Codec.Type + /// Factory method to return child codec instance based on Codec.Type. /// - /// - /// + /// Type of the codec. + /// + /// Codec based on type. + /// public static Codec CreateCodec(Type codecType) { switch (codecType) { case Type.Deflate: return new DeflateCodec(); - default: + case Type.Null: return new NullCodec(); + case Type.Snappy: + case Type.BZip2: + case Type.XZ: + case Type.Zstandard: + { + // Create codec dynamically from "Avro.File.CODECNAME" assembly + Assembly assembly = Assembly.Load($"Avro.File.{codecType}"); + return assembly.CreateInstance($"Avro.File.{codecType}.{codecType}Codec") as Codec; + } } + + throw new AvroRuntimeException($"Unrecognized codec: {codecType}"); } /// - /// Factory method to return child - /// codec instance based on string type + /// Factory method to return child codec instance based on string type. /// - /// - /// + /// Type of the codec. + /// Codec based on type. public static Codec CreateCodecFromString(string codecType) { + if (codecType == null) + { + // If codec is absent, it is assumed to be "null" + // https://avro.apache.org/docs/current/spec.html + return CreateCodec(Type.Null); + } + + foreach (var resolver in _codecResolvers) + { + var candidateCodec = resolver(codecType); + if (candidateCodec != null) + { + return candidateCodec; + } + } + switch (codecType) { case DataFileConstants.DeflateCodec: - return new DeflateCodec(); - default: - return new NullCodec(); + return CreateCodec(Type.Deflate); + case DataFileConstants.NullCodec: + return CreateCodec(Type.Null); + case DataFileConstants.SnappyCodec: + return CreateCodec(Type.Snappy); + case DataFileConstants.BZip2Codec: + return CreateCodec(Type.BZip2); + case DataFileConstants.XZCodec: + return CreateCodec(Type.XZ); + case DataFileConstants.ZstandardCodec: + return CreateCodec(Type.Zstandard); } + + throw new AvroRuntimeException($"Unrecognized codec: {codecType}"); } /// - /// Returns name of codec + /// Returns name of codec. /// - /// + /// + /// A that represents this instance. + /// public override string ToString() { return GetName(); diff --git a/lang/csharp/src/apache/main/File/DataBlock.cs b/lang/csharp/src/apache/main/File/DataBlock.cs deleted file mode 100644 index 7fd85e7bb15..00000000000 --- a/lang/csharp/src/apache/main/File/DataBlock.cs +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -using System; -using System.IO; - -namespace Avro.File -{ - /// - /// Encapsulates a block of data read by the . - /// We will remove this class from the public API in a future version because it is only meant - /// to be used internally. - /// - [Obsolete("This will be removed from the public API in a future version.")] - public class DataBlock - { - /// - /// Raw bytes within this block. - /// - public byte[] Data { get; set; } - - /// - /// Number of entries in this block. - /// - public long NumberOfEntries { get; set; } - - /// - /// Size of this block in bytes. - /// - public long BlockSize { get; set; } - - /// - /// Initializes a new instance of the class. - /// - /// Number of entries in this block. - /// Size of this block in bytes. - public DataBlock(long numberOfEntries, long blockSize) - { - NumberOfEntries = numberOfEntries; - BlockSize = blockSize; - Data = new byte[blockSize]; - } - - internal Stream GetDataAsStream() - { - return new MemoryStream(Data); - } - } -} diff --git a/lang/csharp/src/apache/main/File/DataFileConstants.cs b/lang/csharp/src/apache/main/File/DataFileConstants.cs index 27b034e7ac2..438caeca0e4 100644 --- a/lang/csharp/src/apache/main/File/DataFileConstants.cs +++ b/lang/csharp/src/apache/main/File/DataFileConstants.cs @@ -21,9 +21,6 @@ namespace Avro.File /// /// Constants used in data files. /// - [System.Diagnostics.CodeAnalysis.SuppressMessage("Design", - "CA1052:Static holder types should be Static or NotInheritable", - Justification = "Maintain public API")] public class DataFileConstants { /// @@ -51,6 +48,26 @@ public class DataFileConstants /// public const string DeflateCodec = "deflate"; + /// + /// Identifier for the Snappy codec. + /// + public const string SnappyCodec = "snappy"; + + /// + /// Identifier for the BZip2 codec. + /// + public const string BZip2Codec = "bzip2"; + + /// + /// Identifier for the XZ codec. + /// + public const string XZCodec = "xz"; + + /// + /// Identifier for the Zstandard codec. + /// + public const string ZstandardCodec = "zstandard"; + /// /// Reserved 'avro' metadata key. /// @@ -64,10 +81,13 @@ public class DataFileConstants /// /// Magic bytes at the beginning of an Avro data file. /// - public static byte[] Magic = { (byte)'O', - (byte)'b', - (byte)'j', - Version }; + public static readonly byte[] Magic = + { + (byte)'O', + (byte)'b', + (byte)'j', + Version, + }; /// /// Hash code for the null codec. diff --git a/lang/csharp/src/apache/main/File/DataFileReader.cs b/lang/csharp/src/apache/main/File/DataFileReader.cs index f065dbb9fa7..dff13e05885 100644 --- a/lang/csharp/src/apache/main/File/DataFileReader.cs +++ b/lang/csharp/src/apache/main/File/DataFileReader.cs @@ -27,18 +27,21 @@ namespace Avro.File { /// - /// Provides access to Avro data written using the . + /// Provides access to Avro data written using the . /// /// Type to deserialze data objects to. + /// public class DataFileReader : IFileReader { /// - /// Defines the signature for a function that returns a new + /// Defines the signature for a function that returns a new /// given a writer and reader schema. /// /// Schema used to write the datum. /// Schema used to read the datum. - /// A datum reader. + /// + /// A datum reader. + /// public delegate DatumReader CreateDatumReader(Schema writerSchema, Schema readerSchema); private DatumReader _reader; @@ -52,101 +55,124 @@ public class DataFileReader : IFileReader private byte[] _syncBuffer; private long _blockStart; private Stream _stream; - private bool _leaveOpen; - private Schema _readerSchema; + private readonly bool _leaveOpen; + private readonly Schema _readerSchema; private readonly CreateDatumReader _datumReaderFactory; /// - /// Open a reader for a file using path + /// Open a reader for a file using path. /// - /// - /// + /// The path. + /// + /// File Reader. + /// public static IFileReader OpenReader(string path) { return OpenReader(new FileStream(path, FileMode.Open), null); } /// - /// Open a reader for a file using path and the reader's schema + /// Open a reader for a file using path and the reader's schema. /// - /// Path to the file - /// Schema used to read data from the file - /// A new file reader + /// Path to the file. + /// Schema used to read data from the file. + /// + /// A new file reader. + /// public static IFileReader OpenReader(string path, Schema readerSchema) { return OpenReader(new FileStream(path, FileMode.Open), readerSchema); } /// - /// Open a reader for a stream + /// Open a reader for a stream. /// - /// - /// + /// The in stream. + /// + /// File Reader. + /// public static IFileReader OpenReader(Stream inStream) { return OpenReader(inStream, null); } /// - /// Open a reader for a stream + /// Open a reader for a stream. /// - /// - /// Leave the stream open after disposing the object - /// + /// The in stream. + /// Leave the stream open after disposing the object. + /// + /// File Reader. + /// public static IFileReader OpenReader(Stream inStream, bool leaveOpen) { return OpenReader(inStream, null, leaveOpen); } /// - /// Open a reader for a stream using the reader's schema + /// Open a reader for a stream using the reader's schema. /// - /// Stream containing the file contents - /// Schema used to read the file - /// A new file reader + /// Stream containing the file contents. + /// Schema used to read the file. + /// + /// A new file reader. + /// public static IFileReader OpenReader(Stream inStream, Schema readerSchema) { return OpenReader(inStream, readerSchema, CreateDefaultReader); } /// - /// Open a reader for a stream using the reader's schema + /// Open a reader for a stream using the reader's schema. /// - /// Stream containing the file contents - /// Schema used to read the file - /// Leave the stream open after disposing the object - /// A new file reader + /// Stream containing the file contents. + /// Schema used to read the file. + /// Leave the stream open after disposing the object. + /// + /// A new file reader. + /// public static IFileReader OpenReader(Stream inStream, Schema readerSchema, bool leaveOpen) { return OpenReader(inStream, readerSchema, CreateDefaultReader, leaveOpen); } - + /// - /// Open a reader for a stream using the reader's schema and a custom DatumReader + /// Open a reader for a stream using the reader's schema and a custom DatumReader. /// - /// Stream of file contents - /// Schema used to read the file - /// Factory to create datum readers given a reader an writer schema - /// A new file reader + /// Stream of file contents. + /// Schema used to read the file. + /// Factory to create datum readers given a reader an writer schema. + /// + /// A new file reader. + /// public static IFileReader OpenReader(Stream inStream, Schema readerSchema, CreateDatumReader datumReaderFactory) { return new DataFileReader(inStream, readerSchema, datumReaderFactory, false); // (not supporting 1.2 or below, format) } /// - /// Open a reader for a stream using the reader's schema and a custom DatumReader + /// Open a reader for a stream using the reader's schema and a custom DatumReader. /// - /// Stream of file contents - /// Schema used to read the file - /// Factory to create datum readers given a reader an writer schema - /// Leave the stream open after disposing the object - /// A new file reader + /// Stream of file contents. + /// Schema used to read the file. + /// Factory to create datum readers given a reader an writer schema. + /// Leave the stream open after disposing the object. + /// + /// A new file reader. + /// public static IFileReader OpenReader(Stream inStream, Schema readerSchema, CreateDatumReader datumReaderFactory, bool leaveOpen) { return new DataFileReader(inStream, readerSchema, datumReaderFactory, leaveOpen); // (not supporting 1.2 or below, format) } - DataFileReader(Stream stream, Schema readerSchema, CreateDatumReader datumReaderFactory, bool leaveOpen) + /// + /// Initializes a new instance of the class. + /// + /// The stream. + /// The reader schema. + /// The datum reader factory. + /// if set to true [leave open]. + private DataFileReader(Stream stream, Schema readerSchema, CreateDatumReader datumReaderFactory, bool leaveOpen) { _readerSchema = readerSchema; _datumReaderFactory = datumReaderFactory; @@ -228,6 +254,7 @@ public void Seek(long position) public void Sync(long position) { Seek(position); + // work around an issue where 1.5.4 C stored sync in metadata if ((position == 0) && (GetMeta(DataFileConstants.MetaDataSync) != null)) { @@ -307,7 +334,7 @@ public bool HasNext() if (HasNextBlock()) { _currentBlock = NextRawBlock(_currentBlock); - _currentBlock.Data = _codec.Decompress(_currentBlock.Data); + _currentBlock.Data = _codec.Decompress(_currentBlock.Data, (int)_blockSize); _datumDecoder = new BinaryDecoder(_currentBlock.GetDataAsStream()); } } @@ -350,6 +377,15 @@ protected virtual void Dispose(bool disposing) _stream.Dispose(); } + /// + /// Initializes the specified stream. + /// + /// The stream. + /// + /// Not a valid data file! + /// or + /// Not a valid data file!. + /// private void Init(Stream stream) { _stream = stream; @@ -394,6 +430,14 @@ private void Init(Stream stream) _codec = ResolveCodec(); } + /// + /// Creates the default reader. + /// + /// The writer schema. + /// The reader schema. + /// + /// Datum Reader. + /// private static DatumReader CreateDefaultReader(Schema writerSchema, Schema readerSchema) { DatumReader reader = null; @@ -410,9 +454,21 @@ private static DatumReader CreateDefaultReader(Schema writerSchema, Schema re return reader; } + /// + /// Resolves the codec. + /// + /// + /// Resolved codec. + /// private Codec ResolveCodec() { - return Codec.CreateCodecFromString(GetMetaString(DataFileConstants.MetaDataCodec)); + string codec = GetMetaString(DataFileConstants.MetaDataCodec); + + // If codec is absent, it is assumed to be "null" + if (codec == null) + return Codec.CreateCodec(Codec.Type.Null); + + return Codec.CreateCodecFromString(codec); } /// @@ -421,6 +477,15 @@ public T Next() return Next(default(T)); } + /// + /// Reads the next datum from the file. + /// + /// The reuse. + /// Next deserialized data entry. + /// No more datum objects remaining in block! + /// or + /// Error fetching next object from block: {0}. + /// private T Next(T reuse) { try @@ -442,12 +507,25 @@ private T Next(T reuse) } } + /// + /// Ends the stream for the block. + /// private void BlockFinished() { if (_stream.CanSeek) _blockStart = _stream.Position; } + /// + /// Reads the Next block from the file. + /// + /// The reuse. + /// Data Block. + /// + /// No data remaining in block! + /// or + /// Invalid sync!. + /// private DataBlock NextRawBlock(DataBlock reuse) { if (!HasNextBlock()) @@ -473,6 +551,10 @@ private DataBlock NextRawBlock(DataBlock reuse) return reuse; } + /// + /// Evaluates if there is data left in the stream. + /// + /// True if there is data left in the stream, otherwise false. private bool DataLeft() { long currentPosition = _stream.Position; @@ -484,6 +566,17 @@ private bool DataLeft() return true; } + /// + /// Determines whether [has next block]. + /// + /// + /// true if [has next block]; otherwise, false. + /// + /// + /// Block size invalid or too large for this implementation: " + _blockSize + /// or + /// Error ascertaining if data has next block: {0}. + /// private bool HasNextBlock() { try @@ -508,14 +601,14 @@ private bool HasNextBlock() { _blockRemaining = _decoder.ReadLong(); // read block count } - catch(AvroException) + catch (AvroException) { return false; } } _blockSize = _decoder.ReadLong(); // read block size - if (_blockSize > System.Int32.MaxValue || _blockSize < 0) + if (_blockSize > int.MaxValue || _blockSize < 0) { throw new AvroRuntimeException("Block size invalid or too large for this " + "implementation: " + _blockSize); @@ -531,27 +624,37 @@ private bool HasNextBlock() } /// - /// Encapsulates a block of data read by the . + /// Encapsulates a block of data read by the . /// + /// private class DataBlock { /// - /// Raw bytes within this block. + /// Gets or sets raw bytes within this block. /// - public byte[] Data { get; set; } + /// + /// The data. + /// + public byte[] Data { get; set; } /// - /// Number of entries in this block. + /// Gets or sets number of entries in this block. /// + /// + /// The number of entries. + /// public long NumberOfEntries { get; set; } /// - /// Size of this block in bytes. + /// Gets or sets size of this block in bytes. /// + /// + /// The size of the block. + /// public long BlockSize { get; set; } /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. /// /// Number of entries in this block. /// Size of this block in bytes. @@ -562,6 +665,10 @@ public DataBlock(long numberOfEntries, long blockSize) Data = new byte[blockSize]; } + /// + /// Gets the data as stream. + /// + /// A stream. internal Stream GetDataAsStream() { return new MemoryStream(Data); diff --git a/lang/csharp/src/apache/main/File/DataFileWriter.cs b/lang/csharp/src/apache/main/File/DataFileWriter.cs index 2a1dd7125de..82ac3a9da8b 100644 --- a/lang/csharp/src/apache/main/File/DataFileWriter.cs +++ b/lang/csharp/src/apache/main/File/DataFileWriter.cs @@ -27,11 +27,12 @@ namespace Avro.File /// /// Stores in a file a sequence of data conforming to a schema. The schema is stored in the file /// with the data. Each datum in a file is of the same schema. Data is written with a - /// . Data is grouped into blocks. A synchronization marker is + /// . Data is grouped into blocks. A synchronization marker is /// written between blocks, so that files may be split. Blocks may be compressed. Extensible /// metadata is stored at the end of the file. Files may be appended to. /// /// Type of datum to write to the file. + /// public class DataFileWriter : IFileWriter { private Schema _schema; @@ -56,7 +57,9 @@ public class DataFileWriter : IFileWriter /// /// Datum writer to use. /// Path to the file. - /// A new file writer. + /// + /// A new file writer. + /// public static IFileWriter OpenWriter(DatumWriter writer, string path) { return OpenWriter(writer, new FileStream(path, FileMode.Create), Codec.CreateCodec(Codec.Type.Null)); @@ -68,7 +71,9 @@ public static IFileWriter OpenWriter(DatumWriter writer, string path) /// /// Datum writer to use. /// Stream to write to. - /// A new file writer. + /// + /// A new file writer. + /// public static IFileWriter OpenWriter(DatumWriter writer, Stream outStream) { return OpenWriter(writer, outStream, Codec.CreateCodec(Codec.Type.Null)); @@ -81,7 +86,9 @@ public static IFileWriter OpenWriter(DatumWriter writer, Stream outStream) /// Datum writer to use. /// Stream to write to. /// Leave the stream open after disposing the object - /// A new file writer. + /// + /// A new file writer. + /// public static IFileWriter OpenWriter(DatumWriter writer, Stream outStream, bool leaveOpen) { return OpenWriter(writer, outStream, Codec.CreateCodec(Codec.Type.Null), leaveOpen); @@ -94,7 +101,9 @@ public static IFileWriter OpenWriter(DatumWriter writer, Stream outStream, /// Datum writer to use. /// Path to the file. /// Codec to use when writing. - /// A new file writer. + /// + /// A new file writer. + /// public static IFileWriter OpenWriter(DatumWriter writer, string path, Codec codec) { return OpenWriter(writer, new FileStream(path, FileMode.Create), codec); @@ -107,7 +116,9 @@ public static IFileWriter OpenWriter(DatumWriter writer, string path, Code /// Datum writer to use. /// Stream to write to. /// Codec to use when writing. - /// A new file writer. + /// + /// A new file writer. + /// public static IFileWriter OpenWriter(DatumWriter writer, Stream outStream, Codec codec) { return new DataFileWriter(writer).Create(writer.Schema, outStream, codec, false); @@ -121,7 +132,9 @@ public static IFileWriter OpenWriter(DatumWriter writer, Stream outStream, /// Stream to write to. /// Codec to use when writing. /// Leave the stream open after disposing the object - /// A new file writer. + /// + /// A new file writer. + /// public static IFileWriter OpenWriter(DatumWriter writer, Stream outStream, Codec codec, bool leaveOpen) { return new DataFileWriter(writer).Create(writer.Schema, outStream, codec, leaveOpen); @@ -132,7 +145,9 @@ public static IFileWriter OpenWriter(DatumWriter writer, Stream outStream, /// /// Datum writer to use. /// Path to the file. - /// A new file writer. + /// + /// A new file writer. + /// public static IFileWriter OpenAppendWriter(DatumWriter writer, string path) { return new DataFileWriter(writer).AppendTo(path); @@ -145,7 +160,14 @@ public static IFileWriter OpenAppendWriter(DatumWriter writer, string path /// Datum writer to use. /// reading the existing file. /// stream to write to, positioned at the end of the existing file. - /// A new file writer. + /// + /// A new file writer. + /// + /// + /// {nameof(inStream)} must have Read access + /// or + /// {nameof(outStream)} must have Write access + /// public static IFileWriter OpenAppendWriter(DatumWriter writer, Stream inStream, Stream outStream) { if (!inStream.CanRead) @@ -161,6 +183,10 @@ public static IFileWriter OpenAppendWriter(DatumWriter writer, Stream inSt return new DataFileWriter(writer).AppendTo(inStream, outStream); } + /// + /// Initializes a new instance of the class. + /// + /// The writer. private DataFileWriter(DatumWriter writer) { _writer = writer; @@ -240,6 +266,11 @@ public void Append(T datum) WriteIfBlockFull(); } + /// + /// Appends to file. + /// + /// The path. + /// a file writer private IFileWriter AppendTo(string path) { using (var inStream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) @@ -252,6 +283,12 @@ private IFileWriter AppendTo(string path) // of this writer. } + /// + /// Appends to stream. + /// + /// The in stream. + /// The out stream. + /// private IFileWriter AppendTo(Stream inStream, Stream outStream) { using (var dataFileReader = DataFileReader.OpenReader(inStream)) @@ -281,6 +318,9 @@ private IFileWriter AppendTo(Stream inStream, Stream outStream) return this; } + /// + /// Ensures the header. + /// private void EnsureHeader() { if (!_headerWritten) @@ -304,6 +344,9 @@ public long Sync() return _stream.Position; } + /// + /// Synchronizes the internal. + /// private void SyncInternal() { AssertOpen(); @@ -323,6 +366,9 @@ public void Close() _isOpen = false; } + /// + /// Writes the header. + /// private void WriteHeader() { _encoder.WriteFixed(DataFileConstants.Magic); @@ -330,6 +376,9 @@ private void WriteHeader() WriteSyncData(); } + /// + /// Initializes this instance. + /// private void Init() { _blockCount = 0; @@ -344,6 +393,10 @@ private void Init() _isOpen = true; } + /// + /// Asserts the open. + /// + /// Cannot complete operation: avro file/stream not open private void AssertOpen() { if (!_isOpen) throw new AvroRuntimeException("Cannot complete operation: avro file/stream not open"); @@ -362,6 +415,9 @@ private IFileWriter Create(Schema schema, Stream outStream, Codec codec, bool return this; } + /// + /// Writes the meta data. + /// private void WriteMetaData() { // Add sync, code & schema to metadata @@ -382,17 +438,29 @@ private void WriteMetaData() _encoder.WriteMapEnd(); } + /// + /// Writes if block full. + /// private void WriteIfBlockFull() { if (BufferInUse() >= _syncInterval) WriteBlock(); } + /// + /// Buffers the in use. + /// + /// + /// Position of block stream + /// private long BufferInUse() { return _blockStream.Position; } + /// + /// Writes the block. + /// private void WriteBlock() { if (_blockCount > 0) @@ -413,11 +481,17 @@ private void WriteBlock() } } + /// + /// Writes the synchronize data. + /// private void WriteSyncData() { _encoder.WriteFixed(_syncData); } + /// + /// Generates the synchronize data. + /// private void GenerateSyncData() { _syncData = new byte[16]; @@ -426,11 +500,21 @@ private void GenerateSyncData() random.NextBytes(_syncData); } + /// + /// Sets the meta internal. + /// + /// The key. + /// The value. private void SetMetaInternal(string key, byte[] value) { _metaData.Add(key, value); } + /// + /// Gets the byte value. + /// + /// The value. + /// byte array of string value private byte[] GetByteValue(string value) { return System.Text.Encoding.UTF8.GetBytes(value); diff --git a/lang/csharp/src/apache/main/File/DeflateCodec.cs b/lang/csharp/src/apache/main/File/DeflateCodec.cs index 1a4d9a6cbc3..0ce37adb092 100644 --- a/lang/csharp/src/apache/main/File/DeflateCodec.cs +++ b/lang/csharp/src/apache/main/File/DeflateCodec.cs @@ -23,7 +23,8 @@ namespace Avro.File /// /// Implements deflate compression and decompression. /// - /// + /// + /// public class DeflateCodec : Codec { /// @@ -37,6 +38,7 @@ public override byte[] Compress(byte[] uncompressedData) { Compress.Write(uncompressedData, 0, uncompressedData.Length); } + return outStream.ToArray(); } @@ -54,27 +56,16 @@ public override void Compress(MemoryStream inputStream, MemoryStream outputStrea } /// - public override byte[] Decompress(byte[] compressedData) - { - MemoryStream inStream = new MemoryStream(compressedData); - MemoryStream outStream = new MemoryStream(); - - using (DeflateStream Decompress = - new DeflateStream(inStream, - CompressionMode.Decompress)) - { - CopyTo(Decompress, outStream); - } - return outStream.ToArray(); - } - - private static void CopyTo(Stream from, Stream to) + public override byte[] Decompress(byte[] compressedData, int length) { - byte[] buffer = new byte[4096]; - int read; - while((read = from.Read(buffer, 0, buffer.Length)) != 0) + using (MemoryStream inStream = new MemoryStream(compressedData, 0, length)) + using (MemoryStream outStream = new MemoryStream()) { - to.Write(buffer, 0, read); + using (DeflateStream decompress = new DeflateStream(inStream, CompressionMode.Decompress)) + { + decompress.CopyTo(outStream); + } + return outStream.ToArray(); } } @@ -87,9 +78,7 @@ public override string GetName() /// public override bool Equals(object other) { - if (this == other) - return true; - return this.GetType().Name == other.GetType().Name; + return this == other || GetType().Name == other.GetType().Name; } /// diff --git a/lang/csharp/src/apache/main/File/Header.cs b/lang/csharp/src/apache/main/File/Header.cs index 1ad22168ead..5cdcfd3717a 100644 --- a/lang/csharp/src/apache/main/File/Header.cs +++ b/lang/csharp/src/apache/main/File/Header.cs @@ -25,22 +25,33 @@ namespace Avro.File public class Header { /// - /// Metadata in this header. + /// Gets the metadata in this header. /// + /// + /// The metadata. + /// public IDictionary MetaData { get; } + /// - /// Sync token. + /// Gets the synchronize token. /// + /// + /// The synchronize token. + /// public byte[] SyncData { get; } + /// - /// Avro schema. + /// Gets or sets the schema. /// + /// + /// The schema. + /// public Schema Schema { get; set; } /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. /// public Header() { diff --git a/lang/csharp/src/apache/main/File/IFileReader.cs b/lang/csharp/src/apache/main/File/IFileReader.cs index a4defd2a1e3..c482f7752da 100644 --- a/lang/csharp/src/apache/main/File/IFileReader.cs +++ b/lang/csharp/src/apache/main/File/IFileReader.cs @@ -101,7 +101,7 @@ public interface IFileReader : IDisposable /// /// Position to test. /// - /// True if pasth the next synchronization point after , false + /// True if path the next synchronization point after , false /// otherwise. /// bool PastSync(long position); diff --git a/lang/csharp/src/apache/main/File/NullCodec.cs b/lang/csharp/src/apache/main/File/NullCodec.cs index 12559411315..295d33ae3d7 100644 --- a/lang/csharp/src/apache/main/File/NullCodec.cs +++ b/lang/csharp/src/apache/main/File/NullCodec.cs @@ -27,7 +27,7 @@ namespace Avro.File public class NullCodec : Codec { /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. /// public NullCodec() { } @@ -45,7 +45,7 @@ public override void Compress(MemoryStream inputStream, MemoryStream outputStrea } /// - public override byte[] Decompress(byte[] compressedData) + public override byte[] Decompress(byte[] compressedData, int length) { return compressedData; } @@ -59,9 +59,7 @@ public override string GetName() /// public override bool Equals(object other) { - if (this == other) - return true; - return this.GetType().Name == other.GetType().Name; + return this == other || GetType().Name == other.GetType().Name; } /// diff --git a/lang/csharp/src/apache/main/Generic/DatumWriter.cs b/lang/csharp/src/apache/main/Generic/DatumWriter.cs index be6836537a2..0b732d9d760 100644 --- a/lang/csharp/src/apache/main/Generic/DatumWriter.cs +++ b/lang/csharp/src/apache/main/Generic/DatumWriter.cs @@ -15,6 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + using Avro.IO; namespace Avro.Generic @@ -23,8 +24,6 @@ namespace Avro.Generic /// Defines the interface for an object that writes data of a schema. /// /// Type of the in-memory data representation. - [System.Diagnostics.CodeAnalysis.SuppressMessage("Naming", - "CA1715:Identifiers should have correct prefix", Justification = "Maintain public API")] public interface DatumWriter { /// diff --git a/lang/csharp/src/apache/main/Generic/GenericDatumReader.cs b/lang/csharp/src/apache/main/Generic/GenericDatumReader.cs index 76a95b94ead..1ec126b3aa4 100644 --- a/lang/csharp/src/apache/main/Generic/GenericDatumReader.cs +++ b/lang/csharp/src/apache/main/Generic/GenericDatumReader.cs @@ -98,16 +98,16 @@ public GenericEnumAccess(EnumSchema schema) public object CreateEnum(object reuse, int ordinal) { - if (reuse is GenericEnum) + if (reuse is GenericEnum ge) { - var ge = (GenericEnum) reuse; - if (ge.Schema.Equals(this.schema)) + if (ge.Schema.Equals(schema)) { - ge.Value = this.schema[ordinal]; + ge.Value = schema[ordinal]; return ge; } } - return new GenericEnum(this.schema, this.schema[ordinal]); + + return new GenericEnum(schema, schema[ordinal]); } } @@ -204,12 +204,12 @@ class GenericMapAccess : MapAccess { public object Create(object reuse) { - if (reuse is IDictionary) + if (reuse is IDictionary result) { - var result = (IDictionary)reuse; result.Clear(); return result; } + return new Dictionary(); } diff --git a/lang/csharp/src/apache/main/Generic/GenericEnum.cs b/lang/csharp/src/apache/main/Generic/GenericEnum.cs index 168b5552e77..27bba94ce15 100644 --- a/lang/csharp/src/apache/main/Generic/GenericEnum.cs +++ b/lang/csharp/src/apache/main/Generic/GenericEnum.cs @@ -28,29 +28,30 @@ public class GenericEnum /// public EnumSchema Schema { get; private set; } - private string value; + private string _value; /// /// Value of the enum. /// - public string Value { - get { return value; } + public string Value + { + get { return _value; } set { if (!Schema.Contains(value)) { if (!string.IsNullOrEmpty(Schema.Default)) { - this.value = Schema.Default; + _value = Schema.Default; } else { - throw new AvroException("Unknown value for enum: " + value + "(" + Schema + ")"); + throw new AvroException($"Unknown value for enum: {value}({Schema})"); } } else { - this.value = value; + _value = value; } } } @@ -62,31 +63,33 @@ public string Value { /// Value of the enum. public GenericEnum(EnumSchema schema, string value) { - this.Schema = schema; - this.Value = value; + Schema = schema; + Value = value; } /// public override bool Equals(object obj) { - if (obj == this) return true; - return (obj != null && obj is GenericEnum) - ? Value.Equals((obj as GenericEnum).Value, System.StringComparison.Ordinal) - : false; + if (obj == this) + { + return true; + } + + return obj != null + && obj.GetType() == typeof(GenericEnum) + && Value.Equals(((GenericEnum)obj).Value, System.StringComparison.Ordinal); } /// public override int GetHashCode() { -#pragma warning disable CA1307 // Specify StringComparison return 17 * Value.GetHashCode(); -#pragma warning restore CA1307 // Specify StringComparison } /// public override string ToString() { - return "Schema: " + Schema + ", value: " + Value; + return $"Schema: {Schema}, value: {Value}"; } } } diff --git a/lang/csharp/src/apache/main/Generic/GenericReader.cs b/lang/csharp/src/apache/main/Generic/GenericReader.cs index f42e572d010..0b945b9ff5e 100644 --- a/lang/csharp/src/apache/main/Generic/GenericReader.cs +++ b/lang/csharp/src/apache/main/Generic/GenericReader.cs @@ -19,6 +19,7 @@ using System.Collections.Generic; using Avro.IO; using System.IO; +using System.Linq; namespace Avro.Generic { @@ -75,7 +76,7 @@ public GenericReader(DefaultReader reader) /// Reads an object off the stream. /// /// - /// If not null, the implemenation will try to use to return the object + /// If not null, the implementation will try to use to return the object /// /// Decoder to read from. /// Object we read from the decoder. @@ -88,7 +89,7 @@ public T Read(T reuse, Decoder d) /// /// The default implementation for the generic reader. It constructs new .NET objects for avro objects on the /// stream and returns the .NET object. Users can directly use this class or, if they want to customize the - /// object types for differnt Avro schema types, can derive from this class. There are enough hooks in this + /// object types for different Avro schema types, can derive from this class. There are enough hooks in this /// class to allow customization. /// /// @@ -113,7 +114,7 @@ public class DefaultReader /// /// Constructs the default reader for the given schemas using the DefaultReader. If the /// reader's and writer's schemas are different this class performs the resolution. - /// This default implemenation maps Avro types to .NET types as follows: + /// This default implementation maps Avro types to .NET types as follows: /// /// The schema used while generating the data /// The schema desired by the reader @@ -121,6 +122,8 @@ public DefaultReader(Schema writerSchema, Schema readerSchema) { this.ReaderSchema = readerSchema; this.WriterSchema = writerSchema; + if (!ReaderSchema.CanRead(WriterSchema)) + throw new AvroException("Schema mismatch. Reader: " + ReaderSchema + ", writer: " + WriterSchema); } /// @@ -129,14 +132,11 @@ public DefaultReader(Schema writerSchema, Schema readerSchema) /// The type of object to read. A single schema typically returns an object of a single .NET class. /// The only exception is UnionSchema, which can return a object of different types based on the branch selected. /// - /// If not null, the implemenation will try to use to return the object + /// If not null, the implementation will try to use to return the object /// The decoder for deserialization /// Object read from the decoder. public T Read(T reuse, Decoder decoder) { - if (!ReaderSchema.CanRead(WriterSchema)) - throw new AvroException("Schema mismatch. Reader: " + ReaderSchema + ", writer: " + WriterSchema); - return (T)Read(reuse, WriterSchema, ReaderSchema, decoder); } @@ -144,7 +144,7 @@ public T Read(T reuse, Decoder decoder) /// Reads an object off the stream. /// /// - /// If not null, the implemenation will try to use to return the object. + /// If not null, the implementation will try to use to return the object. /// /// Schema used to write the data. /// Schema to use when reading the data. @@ -291,21 +291,21 @@ protected virtual object ReadRecord(object reuse, RecordSchema writerSchema, Sch } } - var defaultStream = new MemoryStream(); - var defaultEncoder = new BinaryEncoder(defaultStream); - var defaultDecoder = new BinaryDecoder(defaultStream); - foreach (Field rf in rs) + using (var defaultStream = new MemoryStream()) { - if (writerSchema.Contains(rf.Name)) continue; - - defaultStream.Position = 0; // reset for writing - Resolver.EncodeDefaultValue(defaultEncoder, rf.Schema, rf.DefaultValue); - defaultStream.Flush(); - defaultStream.Position = 0; // reset for reading - - object obj = null; - TryGetField(rec, rf.Name, rf.Pos, out obj); - AddField(rec, rf.Name, rf.Pos, Read(obj, rf.Schema, rf.Schema, defaultDecoder)); + var defaultEncoder = new BinaryEncoder(defaultStream); + var defaultDecoder = new BinaryDecoder(defaultStream); + foreach (Field rf in rs.Fields.Where(rf => !writerSchema.Contains(rf.Name))) + { + defaultStream.Position = 0; // reset for writing + Resolver.EncodeDefaultValue(defaultEncoder, rf.Schema, rf.DefaultValue); + defaultStream.Flush(); + defaultStream.Position = 0; // reset for reading + + object obj = null; + TryGetField(rec, rf.Name, rf.Pos, out obj); + AddField(rec, rf.Name, rf.Pos, Read(obj, rf.Schema, rf.Schema, defaultDecoder)); + } } return rec; @@ -357,7 +357,7 @@ protected virtual void AddField(object record, string fieldName, int fieldPos, o /// /// Deserializes a enum. Uses CreateEnum to construct the new enum object. /// - /// If appropirate, uses this instead of creating a new enum object. + /// If appropriate, uses this instead of creating a new enum object. /// The schema the writer used while writing the enum /// The schema the reader is using /// The decoder for deserialization. @@ -373,7 +373,7 @@ protected virtual object ReadEnum(object reuse, EnumSchema writerSchema, Schema /// If appropriate, use this enum object instead of a new one. /// The enum schema used by the reader. /// The symbol that needs to be used. - /// The default implemenation returns a GenericEnum. + /// The default implementation returns a GenericEnum. protected virtual object CreateEnum(object reuse, EnumSchema es, string symbol) { if (reuse is GenericEnum) @@ -431,7 +431,7 @@ protected virtual object CreateArray(object reuse, ArraySchema rs) /// /// Returns the size of the given array object. /// - /// Array object whose size is required. This is guaranteed to be somthing returned by + /// Array object whose size is required. This is guaranteed to be something returned by /// a previous call to CreateArray(). /// The size of the array protected virtual int GetArraySize(object array) @@ -442,7 +442,7 @@ protected virtual int GetArraySize(object array) /// /// Resizes the array to the new value. /// - /// Array object whose size is required. This is guaranteed to be somthing returned by + /// Array object whose size is required. This is guaranteed to be something returned by /// a previous call to CreateArray(). /// The new size. protected virtual void ResizeArray(ref object array, int n) @@ -455,7 +455,7 @@ protected virtual void ResizeArray(ref object array, int n) /// /// Assigns a new value to the object at the given index /// - /// Array object whose size is required. This is guaranteed to be somthing returned by + /// Array object whose size is required. This is guaranteed to be something returned by /// a previous call to CreateArray(). /// The index to reassign to. /// The value to assign. @@ -468,7 +468,7 @@ protected virtual void SetArrayElement(object array, int index, object value) /// /// Returns the element at the given index. /// - /// Array object whose size is required. This is guaranteed to be somthing returned by + /// Array object whose size is required. This is guaranteed to be something returned by /// a previous call to CreateArray(). /// The index to look into. /// The object the given index. Null if no object has been assigned to that index. @@ -478,7 +478,7 @@ protected virtual object GetArrayElement(object array, int index) } /// - /// Deserialized an avro map. The default implemenation creats a new map using CreateMap() and then + /// Deserialized an avro map. The default implementation creates a new map using CreateMap() and then /// adds elements to the map using AddMapEntry(). /// /// If appropriate, use this instead of creating a new map object. @@ -503,7 +503,7 @@ protected virtual object ReadMap(object reuse, MapSchema writerSchema, Schema re /// /// Used by the default implementation of ReadMap() to create a fresh map object. The default - /// implementaion of this method returns a IDictionary<string, map>. + /// implementation of this method returns a IDictionary<string, map>. /// /// If appropriate, use this map object instead of creating a new one. /// Map schema to use when creating the object. @@ -531,7 +531,7 @@ protected virtual void AddMapEntry(object map, string key, object value) } /// - /// Deserialized an object based on the writer's uninon schema. + /// Deserialized an object based on the writer's union schema. /// /// If appropriate, uses this object instead of creating a new one. /// The UnionSchema that the writer used. @@ -574,10 +574,10 @@ protected virtual object ReadLogical(object reuse, LogicalSchema writerSchema, S /// /// If appropriate, uses this object instead of creating a new one. /// The FixedSchema the writer used during serialization. - /// The schema that the readr uses. Must be a FixedSchema with the same + /// The schema that the reader uses. Must be a FixedSchema with the same /// size as the writerSchema. /// The decoder for deserialization. - /// The deserilized object. + /// The deserialized object. protected virtual object ReadFixed(object reuse, FixedSchema writerSchema, Schema readerSchema, Decoder d) { FixedSchema rs = (FixedSchema)readerSchema; diff --git a/lang/csharp/src/apache/main/Generic/GenericWriter.cs b/lang/csharp/src/apache/main/Generic/GenericWriter.cs index 79ff3b20867..b29cb68bfbc 100644 --- a/lang/csharp/src/apache/main/Generic/GenericWriter.cs +++ b/lang/csharp/src/apache/main/Generic/GenericWriter.cs @@ -75,7 +75,7 @@ public void Write(T value, Encoder encoder) /// A General purpose writer for serializing objects into a Stream using /// Avro. This class implements a default way of serializing objects. But /// one can derive a class from this and override different methods to - /// acheive results that are different from the default implementation. + /// achieve results that are different from the default implementation. /// public class DefaultWriter { @@ -177,6 +177,7 @@ public virtual void Write(Schema schema, object value, Encoder encoder) protected virtual void WriteNull(object value, Encoder encoder) { if (value != null) throw TypeMismatch(value, "null", "null"); + encoder.WriteNull(); } /// @@ -246,7 +247,7 @@ protected virtual object GetField(object value, string fieldName, int fieldPos) } /// - /// Serializes an enumeration. The default implementation expectes the value to be string whose + /// Serializes an enumeration. The default implementation expects the value to be string whose /// value is the name of the enumeration. /// /// The EnumSchema for serialization @@ -293,8 +294,8 @@ protected virtual void EnsureArrayObject(object value) /// /// Returns the length of an array. The default implementation requires the object - /// to be an array of objects and returns its length. The defaul implementation - /// gurantees that EnsureArrayObject() has been called on the value before this + /// to be an array of objects and returns its length. The default implementation + /// guarantees that EnsureArrayObject() has been called on the value before this /// function is called. /// /// The object whose array length is required @@ -306,8 +307,8 @@ protected virtual long GetArrayLength(object value) /// /// Returns the element at the given index from the given array object. The default implementation - /// requires that the value is an object array and returns the element in that array. The defaul implementation - /// gurantees that EnsureArrayObject() has been called on the value before this + /// requires that the value is an object array and returns the element in that array. The default implementation + /// guarantees that EnsureArrayObject() has been called on the value before this /// function is called. /// /// The array object @@ -351,7 +352,7 @@ protected virtual void EnsureMapObject(object value) } /// - /// Returns the size of the map object. The default implementation gurantees that EnsureMapObject has been + /// Returns the size of the map object. The default implementation guarantees that EnsureMapObject has been /// successfully called with the given value. The default implementation requires the value /// to be an IDictionary<string, object> and returns the number of elements in it. /// @@ -364,7 +365,7 @@ protected virtual long GetMapSize(object value) /// /// Returns the contents of the given map object. The default implementation guarantees that EnsureMapObject - /// has been called with the given value. The defualt implementation of this method requires that + /// has been called with the given value. The default implementation of this method requires that /// the value is an IDictionary<string, object> and returns its contents. /// /// The map object whose size is desired @@ -437,7 +438,7 @@ protected virtual void WriteFixed(FixedSchema es, object value, Encoder encoder) /// /// Creates a new and uses the provided parameters to build an - /// exception message indicathing there was a type mismatch. + /// exception message indicating there was a type mismatch. /// /// Object whose type does not the expected type /// Schema that we tried to write against diff --git a/lang/csharp/src/apache/main/Generic/PreresolvingDatumReader.cs b/lang/csharp/src/apache/main/Generic/PreresolvingDatumReader.cs index a4b4aa832fb..53270faecdb 100644 --- a/lang/csharp/src/apache/main/Generic/PreresolvingDatumReader.cs +++ b/lang/csharp/src/apache/main/Generic/PreresolvingDatumReader.cs @@ -198,7 +198,7 @@ private ReadItem ResolveEnum(EnumSchema writerSchema, EnumSchema readerSchema) var readerDefaultOrdinal = null != readerSchema.Default ? readerSchema.Ordinal(readerSchema.Default) : -1; foreach (var symbol in writerSchema.Symbols) - { + { var writerOrdinal = writerSchema.Ordinal(symbol); if (readerSchema.Contains(symbol)) { @@ -274,27 +274,29 @@ private ReadItem ResolveRecord(RecordSchema writerSchema, RecordSchema readerSch { if (writerSchema.Contains(rf.Name)) continue; - var defaultStream = new MemoryStream(); - var defaultEncoder = new BinaryEncoder(defaultStream); + using (var defaultStream = new MemoryStream()) + { + var defaultEncoder = new BinaryEncoder(defaultStream); - defaultStream.Position = 0; // reset for writing - Resolver.EncodeDefaultValue(defaultEncoder, rf.Schema, rf.DefaultValue); - defaultStream.Flush(); - var defaultBytes = defaultStream.ToArray(); + defaultStream.Position = 0; // reset for writing + Resolver.EncodeDefaultValue(defaultEncoder, rf.Schema, rf.DefaultValue); + defaultStream.Flush(); + var defaultBytes = defaultStream.ToArray(); - var readItem = ResolveReader(rf.Schema, rf.Schema); + var readItem = ResolveReader(rf.Schema, rf.Schema); - var rfInstance = rf; - if(IsReusable(rf.Schema.Tag)) - { - readSteps.Add((rec, d) => recordAccess.AddField(rec, rfInstance.Name, rfInstance.Pos, - readItem(recordAccess.GetField(rec, rfInstance.Name, rfInstance.Pos), - new BinaryDecoder(new MemoryStream( defaultBytes))))); - } - else - { - readSteps.Add((rec, d) => recordAccess.AddField(rec, rfInstance.Name, rfInstance.Pos, - readItem(null, new BinaryDecoder(new MemoryStream(defaultBytes))))); + var rfInstance = rf; + if (IsReusable(rf.Schema.Tag)) + { + readSteps.Add((rec, d) => recordAccess.AddField(rec, rfInstance.Name, rfInstance.Pos, + readItem(recordAccess.GetField(rec, rfInstance.Name, rfInstance.Pos), + new BinaryDecoder(new MemoryStream(defaultBytes))))); + } + else + { + readSteps.Add((rec, d) => recordAccess.AddField(rec, rfInstance.Name, rfInstance.Pos, + readItem(null, new BinaryDecoder(new MemoryStream(defaultBytes))))); + } } } @@ -319,15 +321,14 @@ private ReadItem ResolveUnion(UnionSchema writerSchema, Schema readerSchema) for (int i = 0; i < writerSchema.Count; i++) { - var writerBranch = writerSchema[i]; + Schema writerBranch = writerSchema[i]; - if (readerSchema is UnionSchema) + if (readerSchema is UnionSchema unionReader) { - var unionReader = (UnionSchema) readerSchema; - var readerBranch = unionReader.MatchingBranch(writerBranch); + int readerBranch = unionReader.MatchingBranch(writerBranch); if (readerBranch == -1) { - lookup[i] = (r, d) => { throw new AvroException( "No matching schema for " + writerBranch + " in " + unionReader ); }; + lookup[i] = (r, d) => { throw new AvroException("No matching schema for " + writerBranch + " in " + unionReader); }; } else { @@ -338,7 +339,7 @@ private ReadItem ResolveUnion(UnionSchema writerSchema, Schema readerSchema) { if (!readerSchema.CanRead(writerBranch)) { - lookup[i] = (r, d) => { throw new AvroException( "Schema mismatch Reader: " + ReaderSchema + ", writer: " + WriterSchema ); }; + lookup[i] = (r, d) => { throw new AvroException("Schema mismatch Reader: " + ReaderSchema + ", writer: " + WriterSchema); }; } else { @@ -619,7 +620,7 @@ protected interface ArrayAccess /// Hint that the array should be able to handle at least targetSize elements. The array /// is not required to be resized /// - /// Array object who needs to support targetSize elements. This is guaranteed to be somthing returned by + /// Array object who needs to support targetSize elements. This is guaranteed to be something returned by /// a previous call to CreateArray(). /// The new size. void EnsureSize(ref object array, int targetSize); @@ -627,7 +628,7 @@ protected interface ArrayAccess /// /// Resizes the array to the new value. /// - /// Array object whose size is required. This is guaranteed to be somthing returned by + /// Array object whose size is required. This is guaranteed to be something returned by /// a previous call to CreateArray(). /// The new size. void Resize(ref object array, int targetSize); diff --git a/lang/csharp/src/apache/main/Generic/PreresolvingDatumWriter.cs b/lang/csharp/src/apache/main/Generic/PreresolvingDatumWriter.cs index a90ac34349b..dd21f62ed80 100644 --- a/lang/csharp/src/apache/main/Generic/PreresolvingDatumWriter.cs +++ b/lang/csharp/src/apache/main/Generic/PreresolvingDatumWriter.cs @@ -114,6 +114,7 @@ private WriteItem ResolveWriter( Schema schema ) protected void WriteNull(object value, Encoder encoder) { if (value != null) throw TypeMismatch(value, "null", "null"); + encoder.WriteNull(); } /// @@ -332,7 +333,7 @@ protected int ResolveUnion(UnionSchema us, Schema[] branchSchemas, object obj) /// /// Creates a new and uses the provided parameters to build an - /// exception message indicathing there was a type mismatch. + /// exception message indicating there was a type mismatch. /// /// Object whose type does not the expected type /// Schema that we tried to write against @@ -383,8 +384,8 @@ protected interface ArrayAccess /// /// Returns the length of an array. The default implementation requires the object - /// to be an array of objects and returns its length. The defaul implementation - /// gurantees that EnsureArrayObject() has been called on the value before this + /// to be an array of objects and returns its length. The default implementation + /// guarantees that EnsureArrayObject() has been called on the value before this /// function is called. /// /// The object whose array length is required @@ -416,7 +417,7 @@ protected interface MapAccess void EnsureMapObject(object value); /// - /// Returns the size of the map object. The default implementation gurantees that EnsureMapObject has been + /// Returns the size of the map object. The default implementation guarantees that EnsureMapObject has been /// successfully called with the given value. The default implementation requires the value /// to be an IDictionary<string, object> and returns the number of elements in it. /// diff --git a/lang/csharp/src/apache/main/IO/BinaryDecoder.netstandard2.0.cs b/lang/csharp/src/apache/main/IO/BinaryDecoder.netstandard2.0.cs index 91afeb57e8e..a37d6fa6c84 100644 --- a/lang/csharp/src/apache/main/IO/BinaryDecoder.netstandard2.0.cs +++ b/lang/csharp/src/apache/main/IO/BinaryDecoder.netstandard2.0.cs @@ -16,6 +16,8 @@ * limitations under the License. */ using System; +using System.IO; +using System.Text; namespace Avro.IO { @@ -24,10 +26,15 @@ namespace Avro.IO /// public partial class BinaryDecoder { + /// + /// It is hard to find documentation about the real maximum array length in .NET Framework 4.6.1, but this seems to work :-/ + /// + private const int MaxDotNetArrayLength = 0x3FFFFFFF; + /// /// A float is written as 4 bytes. /// The float is converted into a 32-bit integer using a method equivalent to - /// Java's floatToIntBits and then encoded in little-endian format. + /// Java's floatToRawIntBits and then encoded in little-endian format. /// /// public float ReadFloat() @@ -49,7 +56,7 @@ public float ReadFloat() /// /// A double is written as 8 bytes. /// The double is converted into a 64-bit integer using a method equivalent to - /// Java's doubleToLongBits and then encoded in little-endian format. + /// Java's doubleToRawLongBits and then encoded in little-endian format. /// /// A double value. public double ReadDouble() @@ -72,10 +79,28 @@ public double ReadDouble() public string ReadString() { int length = ReadInt(); - byte[] buffer = new byte[length]; - //TODO: Fix this because it's lame; - ReadFixed(buffer); - return System.Text.Encoding.UTF8.GetString(buffer); + + if (length < 0) + { + throw new AvroException("Can not deserialize a string with negative length!"); + } + + if (length > MaxDotNetArrayLength) + { + throw new AvroException("String length is not supported!"); + } + + using (var binaryReader = new BinaryReader(stream, Encoding.UTF8, true)) + { + var bytes = binaryReader.ReadBytes(length); + + if (bytes.Length != length) + { + throw new AvroException("Could not read as many bytes from stream as expected!"); + } + + return Encoding.UTF8.GetString(bytes); + } } private void Read(byte[] buffer, int start, int len) diff --git a/lang/csharp/src/apache/main/IO/BinaryDecoder.notnetstandard2.0.cs b/lang/csharp/src/apache/main/IO/BinaryDecoder.notnetstandard2.0.cs index 17bd8415a96..c4a0dfaaf31 100644 --- a/lang/csharp/src/apache/main/IO/BinaryDecoder.notnetstandard2.0.cs +++ b/lang/csharp/src/apache/main/IO/BinaryDecoder.notnetstandard2.0.cs @@ -18,6 +18,7 @@ using System; using System.Buffers; using System.Buffers.Binary; +using System.IO; using System.Text; namespace Avro.IO @@ -28,11 +29,13 @@ namespace Avro.IO public partial class BinaryDecoder { private const int StackallocThreshold = 256; + private const int MaxFastReadLength = 4096; + private const int MaxDotNetArrayLength = 0x7FFFFFC7; /// /// A float is written as 4 bytes. /// The float is converted into a 32-bit integer using a method equivalent to - /// Java's floatToIntBits and then encoded in little-endian format. + /// Java's floatToRawIntBits and then encoded in little-endian format. /// /// public float ReadFloat() @@ -46,7 +49,7 @@ public float ReadFloat() /// /// A double is written as 8 bytes. /// The double is converted into a 64-bit integer using a method equivalent to - /// Java's doubleToLongBits and then encoded in little-endian format. + /// Java's doubleToRawLongBits and then encoded in little-endian format. /// /// A double value. public double ReadDouble() @@ -63,23 +66,54 @@ public double ReadDouble() /// String read from the stream. public string ReadString() { - byte[] bufferArray = null; - int length = ReadInt(); - Span buffer = length <= StackallocThreshold ? - stackalloc byte[length] : - (bufferArray = ArrayPool.Shared.Rent(length)).AsSpan(0, length); - - Read(buffer); - string result = Encoding.UTF8.GetString(buffer); + if (length < 0) + { + throw new AvroException("Can not deserialize a string with negative length!"); + } - if (bufferArray != null) + if (length <= MaxFastReadLength) { - ArrayPool.Shared.Return(bufferArray); + byte[] bufferArray = null; + + try + { + Span buffer = length <= StackallocThreshold ? + stackalloc byte[length] : + (bufferArray = ArrayPool.Shared.Rent(length)).AsSpan(0, length); + + Read(buffer); + + return Encoding.UTF8.GetString(buffer); + } + finally + { + if (bufferArray != null) + { + ArrayPool.Shared.Return(bufferArray); + } + } } + else + { + if (length > MaxDotNetArrayLength) + { + throw new AvroException("String length is not supported!"); + } - return result; + using (var binaryReader = new BinaryReader(stream, Encoding.UTF8, true)) + { + var bytes = binaryReader.ReadBytes(length); + + if (bytes.Length != length) + { + throw new AvroException("Could not read as many bytes from stream as expected!"); + } + + return Encoding.UTF8.GetString(bytes); + } + } } private void Read(byte[] buffer, int start, int len) diff --git a/lang/csharp/src/apache/main/IO/BinaryEncoder.cs b/lang/csharp/src/apache/main/IO/BinaryEncoder.cs index 30100bf31d6..91eb0e5553b 100644 --- a/lang/csharp/src/apache/main/IO/BinaryEncoder.cs +++ b/lang/csharp/src/apache/main/IO/BinaryEncoder.cs @@ -25,7 +25,7 @@ namespace Avro.IO /// public class BinaryEncoder : Encoder { - private readonly Stream Stream; + private readonly Stream stream; /// /// Initializes a new instance of the class without a backing @@ -42,7 +42,7 @@ public BinaryEncoder() : this(null) /// Stream to write to. public BinaryEncoder(Stream stream) { - this.Stream = stream; + this.stream = stream; } /// @@ -87,7 +87,7 @@ public void WriteLong(long value) /// /// A float is written as 4 bytes. /// The float is converted into a 32-bit integer using a method equivalent to - /// Java's floatToIntBits and then encoded in little-endian format. + /// Java's floatToRawIntBits and then encoded in little-endian format. /// /// public void WriteFloat(float value) @@ -99,7 +99,7 @@ public void WriteFloat(float value) /// ///A double is written as 8 bytes. ///The double is converted into a 64-bit integer using a method equivalent to - ///Java's doubleToLongBits and then encoded in little-endian format. + ///Java's doubleToRawLongBits and then encoded in little-endian format. /// /// public void WriteDouble(double value) @@ -203,22 +203,22 @@ public void WriteFixed(byte[] data) /// public void WriteFixed(byte[] data, int start, int len) { - Stream.Write(data, start, len); + stream.Write(data, start, len); } private void writeBytes(byte[] bytes) { - Stream.Write(bytes, 0, bytes.Length); + stream.Write(bytes, 0, bytes.Length); } private void writeBytes(byte[] bytes, int offset, int length) { - Stream.Write(bytes, offset, length); + stream.Write(bytes, offset, length); } private void writeByte(byte b) { - Stream.WriteByte(b); + stream.WriteByte(b); } /// @@ -226,7 +226,7 @@ private void writeByte(byte b) /// public void Flush() { - Stream.Flush(); + stream.Flush(); } } } diff --git a/lang/csharp/src/apache/main/IO/ByteBufferInputStream.cs b/lang/csharp/src/apache/main/IO/ByteBufferInputStream.cs index b077bfd3f18..278bc59b76e 100644 --- a/lang/csharp/src/apache/main/IO/ByteBufferInputStream.cs +++ b/lang/csharp/src/apache/main/IO/ByteBufferInputStream.cs @@ -83,7 +83,7 @@ private MemoryStream GetNextNonEmptyBuffer() /// Throws a . /// /// - /// Always thows. + /// Always throws. /// public override long Length { diff --git a/lang/csharp/src/apache/main/IO/Decoder.cs b/lang/csharp/src/apache/main/IO/Decoder.cs index 536c1e93956..1476b0e1a51 100644 --- a/lang/csharp/src/apache/main/IO/Decoder.cs +++ b/lang/csharp/src/apache/main/IO/Decoder.cs @@ -22,8 +22,6 @@ namespace Avro.IO /// Decoder is used to decode Avro data on a stream. There are methods to read the Avro types on the stream. There are also /// methods to skip items, which are usually more efficient than reading, on the stream. /// - [System.Diagnostics.CodeAnalysis.SuppressMessage("Naming", - "CA1715:Identifiers should have correct prefix", Justification = "Maintain public API")] public interface Decoder { /// @@ -102,7 +100,7 @@ public interface Decoder /// /// Starts reading the map Avro type. This, together with ReadMapNext() is used to read the /// entries from Avro map. This returns the number of entries in the initial chunk. After consuming - /// the chunk, the client should call ReadMapNext() to get the number of entriess in the next + /// the chunk, the client should call ReadMapNext() to get the number of entries in the next /// chunk. The client should repeat the procedure until there are no more entries in the array. /// for (int n = decoder.ReadMapStart(); n > 0; n = decoder.ReadMapNext()) /// { diff --git a/lang/csharp/src/apache/main/IO/Encoder.cs b/lang/csharp/src/apache/main/IO/Encoder.cs index 000a06eed8b..0c1712af430 100644 --- a/lang/csharp/src/apache/main/IO/Encoder.cs +++ b/lang/csharp/src/apache/main/IO/Encoder.cs @@ -19,11 +19,9 @@ namespace Avro.IO { /// - /// Defines the interface for a class that provies low-level support for serializing Avro + /// Defines the interface for a class that provides low-level support for serializing Avro /// values. /// - [System.Diagnostics.CodeAnalysis.SuppressMessage("Naming", - "CA1715:Identifiers should have correct prefix", Justification = "Maintain public API")] public interface Encoder { /// @@ -189,5 +187,10 @@ public interface Encoder /// Position within data where the contents start. /// Number of bytes to write. void WriteFixed(byte[] data, int start, int len); + + /// + /// Flushes the encoder. + /// + void Flush(); } } diff --git a/lang/csharp/src/apache/main/IO/JsonDecoder.cs b/lang/csharp/src/apache/main/IO/JsonDecoder.cs new file mode 100644 index 00000000000..549ead26d33 --- /dev/null +++ b/lang/csharp/src/apache/main/IO/JsonDecoder.cs @@ -0,0 +1,821 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using Avro.IO.Parsing; +using Newtonsoft.Json; + +namespace Avro.IO +{ + /// + /// A for Avro's JSON data encoding. + /// + /// JsonDecoder is not thread-safe. + /// + public class JsonDecoder : ParsingDecoder + { + private JsonReader reader; + private readonly Stack reorderBuffers = new Stack(); + private ReorderBuffer currentReorderBuffer; + + private class ReorderBuffer + { + public readonly IDictionary> SavedFields = + new Dictionary>(); + + public JsonReader OrigParser { get; set; } + } + + private sealed class AvroJsonTextReader : JsonTextReader + { + public AvroJsonTextReader(TextReader reader) : base(reader) + { + this.DateParseHandling = DateParseHandling.None; + } + } + + private JsonDecoder(Symbol root, Stream stream) : base(root) + { + Configure(stream); + } + + private JsonDecoder(Symbol root, string str) : base(root) + { + Configure(str); + } + + /// + /// Initializes a new instance of the class. + /// + public JsonDecoder(Schema schema, Stream stream) : this(GetSymbol(schema), stream) + { + } + + /// + /// Initializes a new instance of the class. + /// + public JsonDecoder(Schema schema, string str) : this(GetSymbol(schema), str) + { + } + + private static Symbol GetSymbol(Schema schema) + { + return (new JsonGrammarGenerator()).Generate(schema); + } + + /// + /// Reconfigures this JsonDecoder to use the InputStream provided. + /// Otherwise, this JsonDecoder will reset its state and then reconfigure its + /// input. + /// + /// The InputStream to read from. Cannot be null. + public void Configure(Stream stream) + { + Parser.Reset(); + reorderBuffers.Clear(); + currentReorderBuffer = null; + reader = new AvroJsonTextReader(new StreamReader(stream)); + reader.Read(); + } + + /// + /// Reconfigures this JsonDecoder to use the String provided for input. + /// Otherwise, this JsonDecoder will reset its state and then reconfigure its + /// input. + /// + /// The String to read from. Cannot be null. + public void Configure(string str) + { + Parser.Reset(); + reorderBuffers.Clear(); + currentReorderBuffer = null; + reader = new AvroJsonTextReader(new StringReader(str)); + reader.Read(); + } + + private void Advance(Symbol symbol) + { + Parser.ProcessTrailingImplicitActions(); + Parser.Advance(symbol); + } + + /// + public override void ReadNull() + { + Advance(Symbol.Null); + if (reader.TokenType == JsonToken.Null) + { + reader.Read(); + } + else + { + throw TypeError("null"); + } + } + + /// + public override bool ReadBoolean() + { + Advance(Symbol.Boolean); + if (reader.TokenType == JsonToken.Boolean) + { + bool result = Convert.ToBoolean(reader.Value); + reader.Read(); + return result; + } + else + { + throw TypeError("boolean"); + } + } + + /// + public override int ReadInt() + { + Advance(Symbol.Int); + if (reader.TokenType == JsonToken.Integer || reader.TokenType == JsonToken.Float) + { + int result = Convert.ToInt32(reader.Value); + reader.Read(); + return result; + } + else + { + throw TypeError("int"); + } + } + + /// + public override long ReadLong() + { + Advance(Symbol.Long); + if (reader.TokenType == JsonToken.Integer || reader.TokenType == JsonToken.Float) + { + long result = Convert.ToInt64(reader.Value); + reader.Read(); + return result; + } + else + { + throw TypeError("long"); + } + } + + /// + public override float ReadFloat() + { + Advance(Symbol.Float); + if (reader.TokenType == JsonToken.Integer || reader.TokenType == JsonToken.Float) + { + float result = (float)Convert.ToDouble(reader.Value); + reader.Read(); + return result; + } + else if (reader.TokenType == JsonToken.String) + { + string str = Convert.ToString(reader.Value); + reader.Read(); + if (IsNaNString(str)) + { + return float.NaN; + } + else if (IsPositiveInfinityString(str)) + { + return float.PositiveInfinity; + } + else if (IsNegativeInfinityString(str)) + { + return float.NegativeInfinity; + } + } + + throw TypeError("float"); + } + + /// + public override double ReadDouble() + { + Advance(Symbol.Double); + if (reader.TokenType == JsonToken.Integer || reader.TokenType == JsonToken.Float) + { + double result = Convert.ToDouble(reader.Value); + reader.Read(); + return result; + } + else if (reader.TokenType == JsonToken.String) + { + string str = Convert.ToString(reader.Value); + reader.Read(); + if (IsNaNString(str)) + { + return double.NaN; + } + else if (IsPositiveInfinityString(str)) + { + return double.PositiveInfinity; + } + else if (IsNegativeInfinityString(str)) + { + return double.NegativeInfinity; + } + } + + throw TypeError("double"); + } + + /// + public override string ReadString() + { + Advance(Symbol.String); + if (Parser.TopSymbol() == Symbol.MapKeyMarker) + { + Parser.Advance(Symbol.MapKeyMarker); + if (reader.TokenType != JsonToken.PropertyName) + { + throw TypeError("map-key"); + } + } + else + { + if (reader.TokenType != JsonToken.String) + { + throw TypeError("string"); + } + } + + string result = Convert.ToString(reader.Value); + reader.Read(); + return result; + } + + /// + public override void SkipString() + { + Advance(Symbol.String); + if (Parser.TopSymbol() == Symbol.MapKeyMarker) + { + Parser.Advance(Symbol.MapKeyMarker); + if (reader.TokenType != JsonToken.PropertyName) + { + throw TypeError("map-key"); + } + } + else + { + if (reader.TokenType != JsonToken.String) + { + throw TypeError("string"); + } + } + + reader.Read(); + } + + /// + public override byte[] ReadBytes() + { + Advance(Symbol.Bytes); + if (reader.TokenType == JsonToken.String) + { + byte[] result = ReadByteArray(); + reader.Read(); + return result; + } + else + { + throw TypeError("bytes"); + } + } + + private byte[] ReadByteArray() + { + Encoding iso = Encoding.GetEncoding("ISO-8859-1"); + byte[] result = iso.GetBytes(Convert.ToString(reader.Value)); + return result; + } + + /// + public override void SkipBytes() + { + Advance(Symbol.Bytes); + if (reader.TokenType == JsonToken.String) + { + reader.Read(); + } + else + { + throw TypeError("bytes"); + } + } + + private void CheckFixed(int size) + { + Advance(Symbol.Fixed); + Symbol.IntCheckAction top = (Symbol.IntCheckAction)Parser.PopSymbol(); + if (size != top.Size) + { + throw new AvroTypeException("Incorrect length for fixed binary: expected " + top.Size + + " but received " + size + " bytes."); + } + } + + /// + public override void ReadFixed(byte[] bytes) + { + ReadFixed(bytes, 0, bytes.Length); + } + + /// + public override void ReadFixed(byte[] bytes, int start, int len) + { + CheckFixed(len); + if (reader.TokenType == JsonToken.String) + { + byte[] result = ReadByteArray(); + reader.Read(); + if (result.Length != len) + { + throw new AvroTypeException("Expected fixed length " + len + ", but got" + result.Length); + } + + Array.Copy(result, 0, bytes, start, len); + } + else + { + throw TypeError("fixed"); + } + } + + /// + public override void SkipFixed(int length) + { + CheckFixed(length); + DoSkipFixed(length); + } + + private void DoSkipFixed(int length) + { + if (reader.TokenType == JsonToken.String) + { + byte[] result = ReadByteArray(); + reader.Read(); + if (result.Length != length) + { + throw new AvroTypeException("Expected fixed length " + length + ", but got" + result.Length); + } + } + else + { + throw TypeError("fixed"); + } + } + + /// + protected override void SkipFixed() + { + Advance(Symbol.Fixed); + Symbol.IntCheckAction top = (Symbol.IntCheckAction)Parser.PopSymbol(); + DoSkipFixed(top.Size); + } + + /// + public override int ReadEnum() + { + Advance(Symbol.Enum); + Symbol.EnumLabelsAction top = (Symbol.EnumLabelsAction)Parser.PopSymbol(); + if (reader.TokenType == JsonToken.String) + { + string label = Convert.ToString(reader.Value); + int n = top.FindLabel(label); + if (n >= 0) + { + reader.Read(); + return n; + } + + throw new AvroTypeException("Unknown symbol in enum " + label); + } + else + { + throw TypeError("fixed"); + } + } + + /// + public override long ReadArrayStart() + { + Advance(Symbol.ArrayStart); + if (reader.TokenType == JsonToken.StartArray) + { + reader.Read(); + return DoArrayNext(); + } + else + { + throw TypeError("array-start"); + } + } + + /// + public override long ReadArrayNext() + { + Advance(Symbol.ItemEnd); + return DoArrayNext(); + } + + private long DoArrayNext() + { + if (reader.TokenType == JsonToken.EndArray) + { + Parser.Advance(Symbol.ArrayEnd); + reader.Read(); + return 0; + } + else + { + return 1; + } + } + + /// + public override void SkipArray() + { + Advance(Symbol.ArrayStart); + if (reader.TokenType == JsonToken.StartArray) + { + reader.Skip(); + reader.Read(); + Advance(Symbol.ArrayEnd); + } + else + { + throw TypeError("array-start"); + } + } + + /// + public override long ReadMapStart() + { + Advance(Symbol.MapStart); + if (reader.TokenType == JsonToken.StartObject) + { + reader.Read(); + return DoMapNext(); + } + else + { + throw TypeError("map-start"); + } + } + + /// + public override long ReadMapNext() + { + Advance(Symbol.ItemEnd); + return DoMapNext(); + } + + private long DoMapNext() + { + if (reader.TokenType == JsonToken.EndObject) + { + reader.Read(); + Advance(Symbol.MapEnd); + return 0; + } + else + { + return 1; + } + } + + /// + public override void SkipMap() + { + Advance(Symbol.MapStart); + if (reader.TokenType == JsonToken.StartObject) + { + reader.Skip(); + reader.Read(); + Advance(Symbol.MapEnd); + } + else + { + throw TypeError("map-start"); + } + } + + /// + public override int ReadUnionIndex() + { + Advance(Symbol.Union); + Symbol.Alternative a = (Symbol.Alternative)Parser.PopSymbol(); + + string label; + if (reader.TokenType == JsonToken.Null) + { + label = "null"; + } + else if (reader.TokenType == JsonToken.StartObject) + { + reader.Read(); + if (reader.TokenType == JsonToken.PropertyName) + { + label = Convert.ToString(reader.Value); + reader.Read(); + Parser.PushSymbol(Symbol.UnionEnd); + } + else + { + throw TypeError("start-union"); + } + } + else + { + throw TypeError("start-union"); + } + + int n = a.FindLabel(label); + if (n < 0) + { + throw new AvroTypeException("Unknown union branch " + label); + } + + Parser.PushSymbol(a.GetSymbol(n)); + return n; + } + + /// + public override void SkipNull() + { + ReadNull(); + } + + /// + public override void SkipBoolean() + { + ReadBoolean(); + } + + /// + public override void SkipInt() + { + ReadInt(); + } + + /// + public override void SkipLong() + { + ReadLong(); + } + + /// + public override void SkipFloat() + { + ReadFloat(); + } + + /// + public override void SkipDouble() + { + ReadDouble(); + } + + /// + public override void SkipEnum() + { + ReadEnum(); + } + + /// + public override void SkipUnionIndex() + { + ReadUnionIndex(); + } + + /// + public override Symbol DoAction(Symbol input, Symbol top) + { + if (top is Symbol.FieldAdjustAction) + { + Symbol.FieldAdjustAction fa = (Symbol.FieldAdjustAction)top; + string name = fa.FName; + if (currentReorderBuffer != null) + { + IList node = currentReorderBuffer.SavedFields[name]; + if (node != null) + { + currentReorderBuffer.SavedFields.Remove(name); + currentReorderBuffer.OrigParser = reader; + reader = MakeParser(node); + return null; + } + } + + if (reader.TokenType == JsonToken.PropertyName) + { + do + { + string fn = Convert.ToString(reader.Value); + reader.Read(); + if (name.Equals(fn) || (fa.Aliases != null && fa.Aliases.Contains(fn))) + { + return null; + } + else + { + if (currentReorderBuffer == null) + { + currentReorderBuffer = new ReorderBuffer(); + } + + currentReorderBuffer.SavedFields[fn] = GetValueAsTree(reader); + } + } while (reader.TokenType == JsonToken.PropertyName); + + throw new AvroTypeException("Expected field name not found: " + fa.FName); + } + } + else if (top == Symbol.FieldEnd) + { + if (currentReorderBuffer != null && currentReorderBuffer.OrigParser != null) + { + reader = currentReorderBuffer.OrigParser; + currentReorderBuffer.OrigParser = null; + } + } + else if (top == Symbol.RecordStart) + { + if (reader.TokenType == JsonToken.StartObject) + { + reader.Read(); + reorderBuffers.Push(currentReorderBuffer); + currentReorderBuffer = null; + } + else + { + throw TypeError("record-start"); + } + } + else if (top == Symbol.RecordEnd || top == Symbol.UnionEnd) + { + // AVRO-2034 advance to the end of our object + while (reader.TokenType != JsonToken.EndObject) + { + reader.Read(); + } + + if (top == Symbol.RecordEnd) + { + if (currentReorderBuffer != null && currentReorderBuffer.SavedFields.Count > 0) + { + throw TypeError("Unknown fields: " + currentReorderBuffer.SavedFields.Keys + .Aggregate((x, y) => x + ", " + y)); + } + + currentReorderBuffer = reorderBuffers.Pop(); + } + + // AVRO-2034 advance beyond the end object for the next record. + reader.Read(); + } + else + { + throw new AvroTypeException("Unknown action symbol " + top); + } + + return null; + } + + + private class JsonElement + { + private readonly JsonToken token; + public JsonToken Token => token; + private readonly object value; + public object Value => value; + + public JsonElement(JsonToken t, object value) + { + token = t; + this.value = value; + } + + public JsonElement(JsonToken t) : this(t, null) + { + } + } + + private static IList GetValueAsTree(JsonReader reader) + { + int level = 0; + IList result = new List(); + do + { + JsonToken t = reader.TokenType; + switch (t) + { + case JsonToken.StartObject: + case JsonToken.StartArray: + level++; + result.Add(new JsonElement(t)); + break; + case JsonToken.EndObject: + case JsonToken.EndArray: + level--; + result.Add(new JsonElement(t)); + break; + case JsonToken.PropertyName: + case JsonToken.String: + case JsonToken.Integer: + case JsonToken.Float: + case JsonToken.Boolean: + case JsonToken.Null: + result.Add(new JsonElement(t, reader.Value)); + break; + } + + reader.Read(); + } while (level != 0); + + result.Add(new JsonElement(JsonToken.None)); + return result; + } + + private JsonReader MakeParser(in IList elements) + { + return new JsonElementReader(elements); + } + + private class JsonElementReader : JsonReader + { + private readonly IList elements; + + public JsonElementReader(IList elements) + { + this.elements = elements; + this.DateParseHandling = DateParseHandling.None; + pos = 0; + } + + private int pos; + + public override object Value + { + get { return elements[pos].Value; } + } + + public override JsonToken TokenType + { + get { return elements[pos].Token; } + } + + public override bool Read() + { + pos++; + return true; + } + } + + private bool IsNaNString(string str) + { + return str.Equals("NaN", StringComparison.Ordinal); + } + + private bool IsPositiveInfinityString(string str) + { + return str.Equals("Infinity", StringComparison.Ordinal) || + str.Equals("INF", StringComparison.Ordinal); + } + + private bool IsNegativeInfinityString(string str) + { + return str.Equals("-Infinity", StringComparison.Ordinal) || + str.Equals("-INF", StringComparison.Ordinal); + } + + private AvroTypeException TypeError(string type) + { + return new AvroTypeException("Expected " + type + ". Got " + reader.TokenType); + } + } +} diff --git a/lang/csharp/src/apache/main/IO/JsonEncoder.cs b/lang/csharp/src/apache/main/IO/JsonEncoder.cs new file mode 100644 index 00000000000..c159a013e8c --- /dev/null +++ b/lang/csharp/src/apache/main/IO/JsonEncoder.cs @@ -0,0 +1,352 @@ +īģŋ/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using Avro.IO.Parsing; +using System.Collections; +using System.IO; +using System.Text; +using Newtonsoft.Json; + +namespace Avro.IO +{ + /// + /// An for Avro's JSON data encoding. + /// + /// JsonEncoder buffers output, and data may not appear on the output until + /// is called. + /// + /// JsonEncoder is not thread-safe. + /// + public class JsonEncoder : ParsingEncoder, Parser.IActionHandler + { + private readonly Parser parser; + private JsonWriter writer; + private bool includeNamespace = true; + + // Has anything been written into the collections? + private readonly BitArray isEmpty = new BitArray(64); + + /// + /// Initializes a new instance of the class. + /// + public JsonEncoder(Schema sc, Stream stream) : this(sc, GetJsonWriter(stream, false)) + { + } + + /// + /// Initializes a new instance of the class. + /// + public JsonEncoder(Schema sc, Stream stream, bool pretty) : this(sc, GetJsonWriter(stream, pretty)) + { + } + + /// + /// Initializes a new instance of the class. + /// + public JsonEncoder(Schema sc, JsonWriter writer) + { + Configure(writer); + parser = new Parser((new JsonGrammarGenerator()).Generate(sc), this); + } + + /// + public override void Flush() + { + parser.ProcessImplicitActions(); + if (writer != null) + { + writer.Flush(); + } + } + + // by default, one object per line. + // with pretty option use default pretty printer with root line separator. + private static JsonWriter GetJsonWriter(Stream stream, bool pretty) + { + JsonWriter writer = new JsonTextWriter(new StreamWriter(stream)); + if (pretty) + { + writer.Formatting = Formatting.Indented; + } + + return writer; + } + + /// + /// Whether to include a union label when generating JSON. + /// + public virtual bool IncludeNamespace + { + get { return includeNamespace; } + set { includeNamespace = value; } + } + + + /// + /// Reconfigures this JsonEncoder to use the output stream provided. + /// Otherwise, this JsonEncoder will flush its current output and then + /// reconfigure its output to use a default UTF8 JsonWriter that writes to the + /// provided Stream. + /// + /// The Stream to direct output to. Cannot be null. + public void Configure(Stream stream) + { + Configure(GetJsonWriter(stream, false)); + } + + /// + /// Reconfigures this JsonEncoder to output to the JsonWriter provided. + /// Otherwise, this JsonEncoder will flush its current output and then + /// reconfigure its output to use the provided JsonWriter. + /// + /// The JsonWriter to direct output to. Cannot be null. + public void Configure(JsonWriter jsonWriter) + { + if (null != parser) + { + Flush(); + } + + writer = jsonWriter; + } + + /// + public override void WriteNull() + { + parser.Advance(Symbol.Null); + writer.WriteNull(); + } + + /// + public override void WriteBoolean(bool b) + { + parser.Advance(Symbol.Boolean); + writer.WriteValue(b); + } + + /// + public override void WriteInt(int n) + { + parser.Advance(Symbol.Int); + writer.WriteValue(n); + } + + /// + public override void WriteLong(long n) + { + parser.Advance(Symbol.Long); + writer.WriteValue(n); + } + + /// + public override void WriteFloat(float f) + { + parser.Advance(Symbol.Float); + writer.WriteValue(f); + } + + /// + public override void WriteDouble(double d) + { + parser.Advance(Symbol.Double); + writer.WriteValue(d); + } + + /// + public override void WriteString(string str) + { + parser.Advance(Symbol.String); + if (parser.TopSymbol() == Symbol.MapKeyMarker) + { + parser.Advance(Symbol.MapKeyMarker); + writer.WritePropertyName(str); + } + else + { + writer.WriteValue(str); + } + } + + /// + public override void WriteBytes(byte[] bytes) + { + WriteBytes(bytes, 0, bytes.Length); + } + + /// + public override void WriteBytes(byte[] bytes, int start, int len) + { + parser.Advance(Symbol.Bytes); + WriteByteArray(bytes, start, len); + } + + private void WriteByteArray(byte[] bytes, int start, int len) + { + Encoding iso = Encoding.GetEncoding("ISO-8859-1"); + writer.WriteValue(iso.GetString(bytes, start, len)); + } + + /// + public override void WriteFixed(byte[] bytes) + { + WriteFixed(bytes, 0, bytes.Length); + } + + /// + public override void WriteFixed(byte[] bytes, int start, int len) + { + parser.Advance(Symbol.Fixed); + Symbol.IntCheckAction top = (Symbol.IntCheckAction)parser.PopSymbol(); + if (len != top.Size) + { + throw new AvroTypeException("Incorrect length for fixed binary: expected " + top.Size + + " but received " + len + " bytes."); + } + + WriteByteArray(bytes, start, len); + } + + /// + public override void WriteEnum(int e) + { + parser.Advance(Symbol.Enum); + Symbol.EnumLabelsAction top = (Symbol.EnumLabelsAction)parser.PopSymbol(); + if (e < 0 || e >= top.Size) + { + throw new AvroTypeException("Enumeration out of range: max is " + top.Size + " but received " + e); + } + + writer.WriteValue(top.GetLabel(e)); + } + + /// + public override void WriteArrayStart() + { + parser.Advance(Symbol.ArrayStart); + writer.WriteStartArray(); + Push(); + if (Depth() >= isEmpty.Length) + { + isEmpty.Length += isEmpty.Length; + } + + isEmpty.Set(Depth(), true); + } + + /// + public override void WriteArrayEnd() + { + if (!isEmpty.Get(Pos)) + { + parser.Advance(Symbol.ItemEnd); + } + + Pop(); + parser.Advance(Symbol.ArrayEnd); + writer.WriteEndArray(); + } + + /// + public override void WriteMapStart() + { + Push(); + if (Depth() >= isEmpty.Length) + { + isEmpty.Length += isEmpty.Length; + } + + isEmpty.Set(Depth(), true); + + parser.Advance(Symbol.MapStart); + writer.WriteStartObject(); + } + + /// + public override void WriteMapEnd() + { + if (!isEmpty.Get(Pos)) + { + parser.Advance(Symbol.ItemEnd); + } + + Pop(); + + parser.Advance(Symbol.MapEnd); + writer.WriteEndObject(); + } + + /// + public override void StartItem() + { + if (!isEmpty.Get(Pos)) + { + parser.Advance(Symbol.ItemEnd); + } + + base.StartItem(); + if (Depth() >= isEmpty.Length) + { + isEmpty.Length += isEmpty.Length; + } + + isEmpty.Set(Depth(), false); + } + + /// + public override void WriteUnionIndex(int unionIndex) + { + parser.Advance(Symbol.Union); + Symbol.Alternative top = (Symbol.Alternative)parser.PopSymbol(); + Symbol symbol = top.GetSymbol(unionIndex); + if (symbol != Symbol.Null && includeNamespace) + { + writer.WriteStartObject(); + writer.WritePropertyName(top.GetLabel(unionIndex)); + parser.PushSymbol(Symbol.UnionEnd); + } + + parser.PushSymbol(symbol); + } + + /// + /// Perform an action based on the given input. + /// + public virtual Symbol DoAction(Symbol input, Symbol top) + { + if (top is Symbol.FieldAdjustAction) + { + Symbol.FieldAdjustAction fa = (Symbol.FieldAdjustAction)top; + writer.WritePropertyName(fa.FName); + } + else if (top == Symbol.RecordStart) + { + writer.WriteStartObject(); + } + else if (top == Symbol.RecordEnd || top == Symbol.UnionEnd) + { + writer.WriteEndObject(); + } + else if (top != Symbol.FieldEnd) + { + throw new AvroTypeException("Unknown action symbol " + top); + } + + return null; + } + } +} diff --git a/lang/csharp/src/apache/main/IO/Parsing/JsonGrammarGenerator.cs b/lang/csharp/src/apache/main/IO/Parsing/JsonGrammarGenerator.cs new file mode 100644 index 00000000000..508ea264b83 --- /dev/null +++ b/lang/csharp/src/apache/main/IO/Parsing/JsonGrammarGenerator.cs @@ -0,0 +1,105 @@ +īģŋ/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; + +namespace Avro.IO.Parsing +{ + /// + /// The class that generates a grammar suitable to parse Avro data in JSON + /// format. + /// + public class JsonGrammarGenerator : ValidatingGrammarGenerator + { + /// + /// Returns the non-terminal that is the start symbol for the grammar for the + /// grammar for the given schema schema. + /// + public override Symbol Generate(Schema schema) + { + return Symbol.NewRoot(Generate(schema, new Dictionary())); + } + + /// + /// Returns the non-terminal that is the start symbol for grammar of the given + /// schema sc. If there is already an entry for the given schema in the + /// given map seen then that entry is returned. Otherwise a new symbol + /// is generated and an entry is inserted into the map. + /// + /// The schema for which the start symbol is required + /// A map of schema to symbol mapping done so far. + /// The start symbol for the schema + protected override Symbol Generate(Schema sc, IDictionary seen) + { + switch (sc.Tag) + { + case Schema.Type.Null: + case Schema.Type.Boolean: + case Schema.Type.Int: + case Schema.Type.Long: + case Schema.Type.Float: + case Schema.Type.Double: + case Schema.Type.String: + case Schema.Type.Bytes: + case Schema.Type.Fixed: + case Schema.Type.Union: + return base.Generate(sc, seen); + case Schema.Type.Enumeration: + return Symbol.NewSeq(new Symbol.EnumLabelsAction(((EnumSchema)sc).Symbols), Symbol.Enum); + case Schema.Type.Array: + return Symbol.NewSeq( + Symbol.NewRepeat(Symbol.ArrayEnd, Symbol.ItemEnd, Generate(((ArraySchema)sc).ItemSchema, seen)), + Symbol.ArrayStart); + case Schema.Type.Map: + return Symbol.NewSeq( + Symbol.NewRepeat(Symbol.MapEnd, Symbol.ItemEnd, Generate(((MapSchema)sc).ValueSchema, seen), + Symbol.MapKeyMarker, Symbol.String), Symbol.MapStart); + case Schema.Type.Record: + { + LitS wsc = new LitS(sc); + if (!seen.TryGetValue(wsc, out Symbol rresult)) + { + Symbol[] production = new Symbol[((RecordSchema)sc).Fields.Count * 3 + 2]; + rresult = Symbol.NewSeq(production); + seen[wsc] = rresult; + + int i = production.Length; + int n = 0; + production[--i] = Symbol.RecordStart; + foreach (Field f in ((RecordSchema)sc).Fields) + { + production[--i] = new Symbol.FieldAdjustAction(n, f.Name, f.Aliases); + production[--i] = Generate(f.Schema, seen); + production[--i] = Symbol.FieldEnd; + n++; + } + + production[i - 1] = Symbol.RecordEnd; + } + + return rresult; + } + case Schema.Type.Logical: + return Generate((sc as LogicalSchema).BaseSchema, seen); + default: + throw new Exception("Unexpected schema type"); + } + } + } +} diff --git a/lang/csharp/src/apache/main/IO/Parsing/Parser.cs b/lang/csharp/src/apache/main/IO/Parsing/Parser.cs new file mode 100644 index 00000000000..ae788ede05d --- /dev/null +++ b/lang/csharp/src/apache/main/IO/Parsing/Parser.cs @@ -0,0 +1,229 @@ +īģŋ/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Avro.IO.Parsing +{ + /// + /// Parser is the class that maintains the stack for parsing. This class is used + /// by encoders, which are not required to skip. + /// + public class Parser + { + /// + /// The parser knows how to handle the terminal and non-terminal symbols. But it + /// needs help from outside to handle implicit and explicit actions. The clients + /// implement this interface to provide this help. + /// + public interface IActionHandler + { + /// + /// Handle the action symbol top when the input is sought to be + /// taken off the stack. + /// + /// The input symbol from the caller of Advance + /// The symbol at the top the stack. + /// null if Advance() is to continue processing the stack. If + /// not null the return value will be returned by Advance(). + Symbol DoAction(Symbol input, Symbol top); + } + + private readonly IActionHandler symbolHandler; + /// + /// Stack of symbols. + /// + protected Symbol[] Stack; + /// + /// Position of the stack. + /// + protected int Pos; + + /// + /// Initializes a new instance of the class. + /// + public Parser(Symbol root, IActionHandler symbolHandler) + { + this.symbolHandler = symbolHandler; + Stack = new Symbol[5]; // Start small to make sure expansion code works + Stack[0] = root; + Pos = 1; + } + + /// + /// If there is no sufficient room in the stack, use this expand it. + /// + private void ExpandStack() + { + Array.Resize(ref Stack, Stack.Length + Math.Max(Stack.Length, 1024)); + } + + /// + /// Recursively replaces the symbol at the top of the stack with its production, + /// until the top is a terminal. Then checks if the top symbol matches the + /// terminal symbol supplied input. + /// + /// The symbol to match against the terminal at the top of the + /// stack. + /// The terminal symbol at the top of the stack unless an implicit action + /// resulted in another symbol, in which case that symbol is returned. + public Symbol Advance(Symbol input) + { + for (;;) + { + Symbol top = Stack[--Pos]; + if (top == input) + { + return top; // A common case + } + + Symbol.Kind k = top.SymKind; + if (k == Symbol.Kind.ImplicitAction) + { + Symbol result = symbolHandler.DoAction(input, top); + if (result != null) + { + return result; + } + } + else if (k == Symbol.Kind.Terminal) + { + throw new AvroTypeException("Attempt to process a " + input + " when a " + top + " was expected."); + } + else if (k == Symbol.Kind.Repeater && input == ((Symbol.Repeater)top).End) + { + return input; + } + else + { + PushProduction(top); + } + } + } + + /// + /// Performs any implicit actions at the top the stack, expanding any production + /// (other than the root) that may be encountered. This method will fail if there + /// are any repeaters on the stack. + /// + public void ProcessImplicitActions() + { + while (Pos > 1) + { + Symbol top = Stack[Pos - 1]; + if (top.SymKind == Symbol.Kind.ImplicitAction) + { + Pos--; + symbolHandler.DoAction(null, top); + } + else if (top.SymKind != Symbol.Kind.Terminal) + { + Pos--; + PushProduction(top); + } + else + { + break; + } + } + } + + /// + /// Performs any "trailing" implicit actions at the top the stack. + /// + public void ProcessTrailingImplicitActions() + { + while (Pos >= 1) + { + Symbol top = Stack[Pos - 1]; + if (top.SymKind == Symbol.Kind.ImplicitAction && ((Symbol.ImplicitAction)top).IsTrailing) + { + Pos--; + symbolHandler.DoAction(null, top); + } + else + { + break; + } + } + } + + /// + /// Pushes the production for the given symbol sym. If sym is a + /// repeater and input is either or + /// pushes nothing. + /// + /// + public void PushProduction(Symbol sym) + { + Symbol[] p = sym.Production; + while (Pos + p.Length > Stack.Length) + { + ExpandStack(); + } + + Array.Copy(p, 0, Stack, Pos, p.Length); + Pos += p.Length; + } + + /// + /// Pops and returns the top symbol from the stack. + /// + public virtual Symbol PopSymbol() + { + return Stack[--Pos]; + } + + /// + /// Returns the top symbol from the stack. + /// + public virtual Symbol TopSymbol() + { + return Stack[Pos - 1]; + } + + /// + /// Pushes sym on to the stack. + /// + public virtual void PushSymbol(Symbol sym) + { + if (Pos == Stack.Length) + { + ExpandStack(); + } + + Stack[Pos++] = sym; + } + + /// + /// Returns the depth of the stack. + /// + public virtual int Depth() + { + return Pos; + } + + /// + /// Resets the stack. + /// + public virtual void Reset() + { + Pos = 1; + } + } +} diff --git a/lang/csharp/src/apache/main/IO/Parsing/SkipParser.cs b/lang/csharp/src/apache/main/IO/Parsing/SkipParser.cs new file mode 100644 index 00000000000..4679215cbc2 --- /dev/null +++ b/lang/csharp/src/apache/main/IO/Parsing/SkipParser.cs @@ -0,0 +1,107 @@ +īģŋ/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System.Diagnostics; + +namespace Avro.IO.Parsing +{ + /// + /// A parser that capable of skipping as well read and write. This class is used + /// by decoders who (unlike encoders) are required to implement methods to skip. + /// + public class SkipParser : Parser + { + /// + /// The clients implement this interface to skip symbols and actions. + /// + public interface ISkipHandler + { + /// + /// Skips the action at the top of the stack. + /// + void SkipAction(); + + /// + /// Skips the symbol at the top of the stack. + /// + void SkipTopSymbol(); + } + + private readonly ISkipHandler skipHandler; + + /// + /// Initializes a new instance of the class. + /// + public SkipParser(Symbol root, IActionHandler symbolHandler, ISkipHandler skipHandler) : base(root, symbolHandler) + { + this.skipHandler = skipHandler; + } + + /// + /// Skips data by calling skipXyz or readXyz methods on + /// this, until the parser stack reaches the target level. + /// + public void SkipTo(int target) + { + while (target < Pos) + { + Symbol top = Stack[Pos - 1]; + while (top.SymKind != Symbol.Kind.Terminal) + { + if (top.SymKind == Symbol.Kind.ImplicitAction || top.SymKind == Symbol.Kind.ExplicitAction) + { + skipHandler.SkipAction(); + } + else + { + --Pos; + PushProduction(top); + } + + goto outerContinue; + } + + skipHandler.SkipTopSymbol(); + outerContinue: ; + } + } + + /// + /// Skips the repeater at the top the stack. + /// + public void SkipRepeater() + { + int target = Pos; + Symbol repeater = Stack[--Pos]; + Debug.Assert(repeater.SymKind == Symbol.Kind.Repeater); + PushProduction(repeater); + SkipTo(target); + } + + /// + /// Pushes the given symbol on to the skip and skips it. + /// + /// The symbol that should be skipped. + public void SkipSymbol(Symbol symToSkip) + { + int target = Pos; + PushSymbol(symToSkip); + SkipTo(target); + } + } +} diff --git a/lang/csharp/src/apache/main/IO/Parsing/Symbol.cs b/lang/csharp/src/apache/main/IO/Parsing/Symbol.cs new file mode 100644 index 00000000000..d5f4ee09c43 --- /dev/null +++ b/lang/csharp/src/apache/main/IO/Parsing/Symbol.cs @@ -0,0 +1,984 @@ +īģŋ/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections; +using System.Collections.Generic; +using System.Linq; + +namespace Avro.IO.Parsing +{ + /// + /// Symbol is the base of all symbols (terminals and non-terminals) of the + /// grammar. + /// + public abstract class Symbol + { + /// + /// The type of symbol. + /// + public enum Kind + { + /// + /// terminal symbols which have no productions + Terminal, + + /// + /// Start symbol for some grammar + Root, + + /// + /// non-terminal symbol which is a sequence of one or more other symbols + Sequence, + + /// + /// non-terminal to represent the contents of an array or map + Repeater, + + /// + /// non-terminal to represent the union + Alternative, + + /// + /// non-terminal action symbol which are automatically consumed + ImplicitAction, + + /// + /// non-terminal action symbol which is explicitly consumed + ExplicitAction + } + + /// The kind of this symbol. + public Kind SymKind { get; private set; } + + /// + /// The production for this symbol. If this symbol is a terminal this is + /// null. Otherwise this holds the the sequence of the symbols that + /// forms the production for this symbol. The sequence is in the reverse order of + /// production. This is useful for easy copying onto parsing stack. + /// + /// Please note that this is a final. So the production for a symbol should be + /// known before that symbol is constructed. This requirement cannot be met for + /// those symbols which are recursive (e.g. a record that holds union a branch of + /// which is the record itself). To resolve this problem, we initialize the + /// symbol with an array of nulls. Later we fill the symbols. Not clean, but + /// works. The other option is to not have this field a final. But keeping it + /// final and thus keeping symbol immutable gives some comfort. See various + /// generators how we generate records. + /// + public Symbol[] Production { get; private set; } + + /// + /// Constructs a new symbol of the given kind. + /// + protected Symbol(Kind kind) : this(kind, null) + { + } + + /// + /// Constructs a new symbol of the given kind and production. + /// + protected Symbol(Kind kind, Symbol[] production) + { + Production = production; + SymKind = kind; + } + + /// + /// A convenience method to construct a root symbol. + /// + public static Symbol NewRoot(params Symbol[] symbols) => new Root(symbols); + + /// + /// A convenience method to construct a sequence. + /// + /// The constituent symbols of the sequence. + public static Symbol NewSeq(params Symbol[] production) => new Sequence(production); + + /// + /// A convenience method to construct a repeater. + /// + /// The end symbol. + /// The symbols to repeat in the repeater. + public static Symbol NewRepeat(Symbol endSymbol, params Symbol[] symsToRepeat) => + new Repeater(endSymbol, symsToRepeat); + + /// + /// A convenience method to construct a union. + /// + public static Symbol NewAlt(Symbol[] symbols, string[] labels) => new Alternative(symbols, labels); + + /// + /// A convenience method to construct an ErrorAction. + /// + /// + protected static Symbol Error(string e) => new ErrorAction(e); + + /// + /// A convenience method to construct a ResolvingAction. + /// + /// The writer symbol + /// The reader symbol + protected static Symbol Resolve(Symbol w, Symbol r) => new ResolvingAction(w, r); + + /// + /// Fixup symbol. + /// + protected class Fixup + { + private readonly Symbol[] symbols; + + /// + /// The symbols. + /// + public Symbol[] Symbols + { + get { return (Symbol[])symbols.Clone(); } + } + + /// + /// The position. + /// + public int Pos { get; private set; } + + /// + /// Initializes a new instance of the class. + /// + public Fixup(Symbol[] symbols, int pos) + { + this.symbols = (Symbol[])symbols.Clone(); + Pos = pos; + } + } + + /// + /// Flatten the given sub-array of symbols into a sub-array of symbols. + /// + protected virtual Symbol Flatten(IDictionary map, IDictionary> map2) => this; + + /// + /// Returns the flattened size. + /// + public virtual int FlattenedSize() => 1; + + /// + /// Flattens the given sub-array of symbols into an sub-array of symbols. Every + /// Sequence in the input are replaced by its production recursively. + /// Non-Sequence symbols, they internally have other symbols those + /// internal symbols also get flattened. When flattening is done, the only place + /// there might be Sequence symbols is in the productions of a Repeater, + /// Alternative, or the symToParse and symToSkip in a UnionAdjustAction or + /// SkipAction. + /// + /// Why is this done? We want our parsers to be fast. If we left the grammars + /// unflattened, then the parser would be constantly copying the contents of + /// nested Sequence productions onto the parsing stack. Instead, because of + /// flattening, we have a long top-level production with no Sequences unless the + /// Sequence is absolutely needed, e.g., in the case of a Repeater or an + /// Alternative. + /// + /// Well, this is not exactly true when recursion is involved. Where there is a + /// recursive record, that record will be "inlined" once, but any internal (ie, + /// recursive) references to that record will be a Sequence for the record. That + /// Sequence will not further inline itself -- it will refer to itself as a + /// Sequence. The same is true for any records nested in this outer recursive + /// record. Recursion is rare, and we want things to be fast in the typical case, + /// which is why we do the flattening optimization. + /// + /// + /// The algorithm does a few tricks to handle recursive symbol definitions. In + /// order to avoid infinite recursion with recursive symbols, we have a map of + /// Symbol->Symbol. Before fully constructing a flattened symbol for a + /// Sequence we insert an empty output symbol into the map and then + /// start filling the production for the Sequence. If the same + /// Sequence is encountered due to recursion, we simply return the + /// (empty) output Sequence from the map. Then we actually fill out + /// the production for the Sequence. As part of the flattening process + /// we copy the production of Sequences into larger arrays. If the + /// original Sequence has not not be fully constructed yet, we copy a + /// bunch of nulls. Fix-up remembers all those null patches. + /// The fix-ups gets finally filled when we know the symbols to occupy those + /// patches. + /// + /// The array of input symbols to flatten + /// The position where the input sub-array starts. + /// The output that receives the flattened list of symbols. The + /// output array should have sufficient space to receive the + /// expanded sub-array of symbols. + /// The position where the output input sub-array starts. + /// A map of symbols which have already been expanded. Useful for + /// handling recursive definitions and for caching. + /// A map to to store the list of fix-ups. + protected static void Flatten(Symbol[] input, int start, Symbol[] output, int skip, + IDictionary map, IDictionary> map2) + { + for (int i = start, j = skip; i < input.Length; i++) + { + Symbol s = input[i].Flatten(map, map2); + if (s is Sequence) + { + Symbol[] p = s.Production; + if (!map2.TryGetValue((Sequence)s, out IList l)) + { + Array.Copy(p, 0, output, j, p.Length); + // Copy any fixups that will be applied to p to add missing symbols + foreach (IList fixups in map2.Values) + { + CopyFixups(fixups, output, j, p); + } + } + else + { + l.Add(new Fixup(output, j)); + } + + j += p.Length; + } + else + { + output[j++] = s; + } + } + } + + private static void CopyFixups(IList fixups, Symbol[] output, int outPos, Symbol[] toCopy) + { + for (int i = 0, n = fixups.Count; i < n; i += 1) + { + Fixup fixup = fixups[i]; + if (fixup.Symbols == toCopy) + { + fixups.Add(new Fixup(output, fixup.Pos + outPos)); + } + } + } + + /// + /// Returns the amount of space required to flatten the given sub-array of + /// symbols. + /// + /// The array of input symbols. + /// The index where the subarray starts. + /// The number of symbols that will be produced if one expands the given + /// input. + protected static int FlattenedSize(Symbol[] symbols, int start) + { + int result = 0; + for (int i = start; i < symbols.Length; i++) + { + if (symbols[i] is Sequence) + { + Sequence s = (Sequence)symbols[i]; + result += s.FlattenedSize(); + } + else + { + result += 1; + } + } + + return result; + } + + /// + /// Terminal symbol. + /// + protected class Terminal : Symbol + { + /// + /// Printable name. + /// + public string PrintName { get; private set; } + + /// + /// Initializes a new instance of the class. + /// + public Terminal(string printName) : base(Kind.Terminal) + { + PrintName = printName; + } + + /// + public override string ToString() => PrintName; + } + + /// + /// Implicit action. + /// + public class ImplicitAction : Symbol + { + /// + /// Set to true if and only if this implicit action is a trailing + /// action. That is, it is an action that follows real symbol. E.g + /// . + /// + public bool IsTrailing { get; private set; } + + /// + /// Initializes a new instance of the class. + /// + public ImplicitAction() : this(false) + { + } + + /// + /// Initializes a new instance of the class. + /// + public ImplicitAction(bool isTrailing) : base(Kind.ImplicitAction) + { + IsTrailing = isTrailing; + } + } + + /// + /// Root symbol. + /// + protected class Root : Symbol + { + /// + /// Initializes a new instance of the class. + /// + public Root(params Symbol[] symbols) : base(Kind.Root, MakeProduction(symbols)) + { + Production[0] = this; + } + + private static Symbol[] MakeProduction(Symbol[] symbols) + { + Symbol[] result = new Symbol[FlattenedSize(symbols, 0) + 1]; + Flatten(symbols, 0, result, 1, new Dictionary(), + new Dictionary>()); + return result; + } + } + + /// + /// Sequence symbol. + /// + protected class Sequence : Symbol, IEnumerable + { + /// + /// Initializes a new instance of the class. + /// + public Sequence(Symbol[] productions) : base(Kind.Sequence, productions) + { + } + + /// + /// Get the symbol at the given index. + /// + public virtual Symbol this[int index] => Production[index]; + + /// + /// Get the symbol at the given index. + /// + public virtual Symbol Get(int index) => Production[index]; + + /// + /// Returns the number of symbols. + /// + public virtual int Size() => Production.Length; + + /// + public IEnumerator GetEnumerator() => Enumerable.Reverse(Production).GetEnumerator(); + + IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); + + /// + protected override Symbol Flatten(IDictionary map, + IDictionary> map2) + { + if (!map.TryGetValue(this, out Sequence result)) + { + result = new Sequence(new Symbol[FlattenedSize()]); + map[this] = result; + IList l = new List(); + map2[result] = l; + + Flatten(Production, 0, result.Production, 0, map, map2); + foreach (Fixup f in l) + { + Array.Copy(result.Production, 0, f.Symbols, f.Pos, result.Production.Length); + } + + map2.Remove(result); + } + + return result; + } + + /// + public override int FlattenedSize() => FlattenedSize(Production, 0); + } + + /// + /// Repeater symbol. + /// + public class Repeater : Symbol + { + /// + /// The end symbol. + /// + public Symbol End { get; private set; } + + /// + /// Initializes a new instance of the class. + /// + public Repeater(Symbol end, params Symbol[] sequenceToRepeat) : base(Kind.Repeater, + MakeProduction(sequenceToRepeat)) + { + End = end; + Production[0] = this; + } + + private static Symbol[] MakeProduction(Symbol[] p) + { + Symbol[] result = new Symbol[p.Length + 1]; + Array.Copy(p, 0, result, 1, p.Length); + return result; + } + + /// + protected override Symbol Flatten(IDictionary map, + IDictionary> map2) + { + Repeater result = new Repeater(End, new Symbol[FlattenedSize(Production, 1)]); + Flatten(Production, 1, result.Production, 1, map, map2); + return result; + } + } + + /// + /// Returns true if the Parser contains any Error symbol, indicating that it may + /// fail for some inputs. + /// + private static bool HasErrors(Symbol symbol) + { + return HasErrors(symbol, new HashSet()); + } + + private static bool HasErrors(Symbol symbol, ISet visited) + { + // avoid infinite recursion + if (visited.Contains(symbol)) + { + return false; + } + + visited.Add(symbol); + + switch (symbol.SymKind) + { + case Kind.Alternative: + return HasErrors(symbol, ((Alternative)symbol).Symbols, visited); + case Kind.ExplicitAction: + return false; + case Kind.ImplicitAction: + if (symbol is ErrorAction) + { + return true; + } + + if (symbol is UnionAdjustAction) + { + return HasErrors(((UnionAdjustAction)symbol).SymToParse, visited); + } + + return false; + case Kind.Repeater: + Repeater r = (Repeater)symbol; + return HasErrors(r.End, visited) || HasErrors(symbol, r.Production, visited); + case Kind.Root: + case Kind.Sequence: + return HasErrors(symbol, symbol.Production, visited); + case Kind.Terminal: + return false; + default: + throw new Exception("unknown symbol kind: " + symbol.SymKind); + } + } + + private static bool HasErrors(Symbol root, Symbol[] symbols, ISet visited) + { + if (null != symbols) + { + foreach (Symbol s in symbols) + { + if (s == root) + { + continue; + } + + if (HasErrors(s, visited)) + { + return true; + } + } + } + + return false; + } + + /// + /// Alternative symbol. + /// + public class Alternative : Symbol + { + /// + /// The symbols. + /// + public Symbol[] Symbols { get; private set; } + + /// + /// The labels. + /// + public string[] Labels { get; private set; } + + /// + /// Initializes a new instance of the class. + /// + public Alternative(Symbol[] symbols, string[] labels) : base(Kind.Alternative) + { + Symbols = symbols; + Labels = labels; + } + + /// + /// Returns the symbol at the given index. + /// + public virtual Symbol GetSymbol(int index) + { + return Symbols[index]; + } + + /// + /// Returns the label at the given index. + /// + public virtual string GetLabel(int index) + { + return Labels[index]; + } + + /// + /// Returns the size. + /// + public virtual int Size() + { + return Symbols.Length; + } + + /// + /// Returns the index of the given label. + /// + public virtual int FindLabel(string label) + { + if (label != null) + { + for (int i = 0; i < Labels.Length; i++) + { + if (label.Equals(Labels[i])) + { + return i; + } + } + } + + return -1; + } + + /// + protected override Symbol Flatten(IDictionary map, + IDictionary> map2) + { + Symbol[] ss = new Symbol[Symbols.Length]; + for (int i = 0; i < ss.Length; i++) + { + ss[i] = Symbols[i].Flatten(map, map2); + } + + return new Alternative(ss, Labels); + } + } + + /// + /// The error action. + /// + public class ErrorAction : ImplicitAction + { + /// + /// The error message. + /// + public string Msg { get; private set; } + + /// + /// Initializes a new instance of the class. + /// + public ErrorAction(string msg) + { + Msg = msg; + } + } + + /// + /// Int check action. + /// + public class IntCheckAction : Symbol + { + /// + /// The size. + /// + public int Size { get; private set; } + + /// + /// Initializes a new instance of the class. + /// + public IntCheckAction(int size) : base(Kind.ExplicitAction) + { + Size = size; + } + } + + /// + /// The writer union action. + /// + public class WriterUnionAction : ImplicitAction + { + } + + /// + /// The resolving action. + /// + public class ResolvingAction : ImplicitAction + { + /// + /// The writer. + /// + public Symbol Writer { get; private set; } + + /// + /// The reader. + /// + public Symbol Reader { get; private set; } + + /// + /// Initializes a new instance of the class. + /// + public ResolvingAction(Symbol writer, Symbol reader) + { + Writer = writer; + Reader = reader; + } + + /// + protected override Symbol Flatten(IDictionary map, + IDictionary> map2) + { + return new ResolvingAction(Writer.Flatten(map, map2), Reader.Flatten(map, map2)); + } + } + + /// + /// The skip action. + /// + public class SkipAction : ImplicitAction + { + /// + /// The symbol to skip. + /// + public Symbol SymToSkip { get; private set; } + + /// + /// Initializes a new instance of the class. + /// + public SkipAction(Symbol symToSkip) : base(true) + { + SymToSkip = symToSkip; + } + + /// + protected override Symbol Flatten(IDictionary map, + IDictionary> map2) + { + return new SkipAction(SymToSkip.Flatten(map, map2)); + } + } + + /// + /// The field adjust action. + /// + public class FieldAdjustAction : ImplicitAction + { + /// + /// The index. + /// + public int RIndex { get; private set; } + + /// + /// The field name. + /// + public string FName { get; private set; } + + /// + /// The field aliases. + /// + public IList Aliases { get; private set; } + + /// + /// Initializes a new instance of the class. + /// + public FieldAdjustAction(int rindex, string fname, IList aliases) + { + RIndex = rindex; + FName = fname; + Aliases = aliases; + } + } + + /// + /// THe field order action. + /// + public sealed class FieldOrderAction : ImplicitAction + { + /// + /// Whether no reorder is needed. + /// + public bool NoReorder { get; private set; } + + /// + /// The fields. + /// + public Field[] Fields { get; private set; } + + /// + /// Initializes a new instance of the class. + /// + public FieldOrderAction(Field[] fields) + { + Fields = fields; + bool noReorder = true; + for (int i = 0; noReorder && i < fields.Length; i++) + { + noReorder &= (i == fields[i].Pos); + } + + NoReorder = noReorder; + } + } + + /// + /// The default start action. + /// + public class DefaultStartAction : ImplicitAction + { + /// + /// The contents. + /// + public byte[] Contents { get; private set; } + + /// + /// Initializes a new instance of the class. + /// + public DefaultStartAction(byte[] contents) + { + Contents = contents; + } + } + + /// + /// The union adjust action. + /// + public class UnionAdjustAction : ImplicitAction + { + /// + /// The index. + /// + public int RIndex { get; private set; } + + /// + /// The symbol to parser. + /// + public Symbol SymToParse { get; private set; } + + /// + /// Initializes a new instance of the class. + /// + public UnionAdjustAction(int rindex, Symbol symToParse) + { + RIndex = rindex; + SymToParse = symToParse; + } + + /// + protected override Symbol Flatten(IDictionary map, + IDictionary> map2) + { + return new UnionAdjustAction(RIndex, SymToParse.Flatten(map, map2)); + } + } + + /// + /// The enum labels action. + /// + public class EnumLabelsAction : IntCheckAction + { + /// + /// The symbols. + /// + public IList Symbols { get; private set; } + + /// + /// Initializes a new instance of the class. + /// + public EnumLabelsAction(IList symbols) : base(symbols.Count) + { + Symbols = symbols; + } + + /// + /// Returns the label at the given index. + /// + public virtual string GetLabel(int n) + { + return Symbols[n]; + } + + /// + /// Returns index of the given label. + /// + public virtual int FindLabel(string label) + { + if (label != null) + { + for (int i = 0; i < Symbols.Count; i++) + { + if (label.Equals(Symbols[i])) + { + return i; + } + } + } + + return -1; + } + } + + /// + /// The terminal symbols for the grammar. + /// + public static Symbol Null { get; } = new Terminal("null"); + + /// + /// Boolean + /// + public static Symbol Boolean { get; } = new Terminal("boolean"); + + /// + /// Int + /// + public static Symbol Int { get; } = new Terminal("int"); + /// + /// Long + /// + public static Symbol Long { get; } = new Terminal("long"); + /// + /// Float + /// + public static Symbol Float { get; } = new Terminal("float"); + /// + /// Double + /// + public static Symbol Double { get; } = new Terminal("double"); + /// + /// String + /// + public static Symbol String { get; } = new Terminal("string"); + /// + /// Bytes + /// + public static Symbol Bytes { get; } = new Terminal("bytes"); + /// + /// Fixed + /// + public static Symbol Fixed { get; } = new Terminal("fixed"); + /// + /// Enum + /// + public static Symbol Enum { get; } = new Terminal("enum"); + /// + /// Union + /// + public static Symbol Union { get; } = new Terminal("union"); + + /// + /// ArrayStart + /// + public static Symbol ArrayStart { get; } = new Terminal("array-start"); + /// + /// ArrayEnd + /// + public static Symbol ArrayEnd { get; } = new Terminal("array-end"); + /// + /// MapStart + /// + public static Symbol MapStart { get; } = new Terminal("map-start"); + /// + /// MapEnd + /// + public static Symbol MapEnd { get; } = new Terminal("map-end"); + /// + /// ItemEnd + /// + public static Symbol ItemEnd { get; } = new Terminal("item-end"); + + /// + /// WriterUnion + /// + public static Symbol WriterUnion { get; } = new WriterUnionAction(); + + /// + /// FieldAction - a pseudo terminal used by parsers + /// + public static Symbol FieldAction { get; } = new Terminal("field-action"); + + /// + /// RecordStart + /// + public static Symbol RecordStart { get; } = new ImplicitAction(false); + /// + /// RecordEnd + /// + public static Symbol RecordEnd { get; } = new ImplicitAction(true); + /// + /// UnionEnd + /// + public static Symbol UnionEnd { get; } = new ImplicitAction(true); + /// + /// FieldEnd + /// + public static Symbol FieldEnd { get; } = new ImplicitAction(true); + + /// + /// DefaultEndAction + /// + public static Symbol DefaultEndAction { get; } = new ImplicitAction(true); + /// + /// MapKeyMarker + /// + public static Symbol MapKeyMarker { get; } = new Terminal("map-key-marker"); + } +} diff --git a/lang/csharp/src/apache/main/IO/Parsing/ValidatingGrammarGenerator.cs b/lang/csharp/src/apache/main/IO/Parsing/ValidatingGrammarGenerator.cs new file mode 100644 index 00000000000..7d109660671 --- /dev/null +++ b/lang/csharp/src/apache/main/IO/Parsing/ValidatingGrammarGenerator.cs @@ -0,0 +1,170 @@ +īģŋ/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using Avro.Generic; + +namespace Avro.IO.Parsing +{ + /// + /// The class that generates validating grammar. + /// + public class ValidatingGrammarGenerator + { + /// + /// Returns the non-terminal that is the start symbol for the grammar for the + /// given schema sc. + /// + public virtual Symbol Generate(Schema schema) + { + return Symbol.NewRoot(Generate(schema, new Dictionary())); + } + + /// + /// Returns the non-terminal that is the start symbol for the grammar for the + /// given schema sc. If there is already an entry for the given schema + /// in the given map seen then that entry is returned. Otherwise a new + /// symbol is generated and an entry is inserted into the map. + /// + /// The schema for which the start symbol is required + /// A map of schema to symbol mapping done so far. + /// The start symbol for the schema + protected virtual Symbol Generate(Schema sc, IDictionary seen) + { + switch (sc.Tag) + { + case Schema.Type.Null: + return Symbol.Null; + case Schema.Type.Boolean: + return Symbol.Boolean; + case Schema.Type.Int: + return Symbol.Int; + case Schema.Type.Long: + return Symbol.Long; + case Schema.Type.Float: + return Symbol.Float; + case Schema.Type.Double: + return Symbol.Double; + case Schema.Type.String: + return Symbol.String; + case Schema.Type.Bytes: + return Symbol.Bytes; + case Schema.Type.Fixed: + return Symbol.NewSeq(new Symbol.IntCheckAction(((FixedSchema)sc).Size), Symbol.Fixed); + case Schema.Type.Enumeration: + return Symbol.NewSeq(new Symbol.IntCheckAction(((EnumSchema)sc).Symbols.Count), Symbol.Enum); + case Schema.Type.Array: + return Symbol.NewSeq( + Symbol.NewRepeat(Symbol.ArrayEnd, Generate(((ArraySchema)sc).ItemSchema, seen)), + Symbol.ArrayStart); + case Schema.Type.Map: + return Symbol.NewSeq( + Symbol.NewRepeat(Symbol.MapEnd, Generate(((MapSchema)sc).ValueSchema, seen), Symbol.String), + Symbol.MapStart); + case Schema.Type.Record: + { + LitS wsc = new LitS(sc); + if (!seen.TryGetValue(wsc, out Symbol rresult)) + { + Symbol[] production = new Symbol[((RecordSchema)sc).Fields.Count]; + + // We construct a symbol without filling the array. Please see + // for the reason. + rresult = Symbol.NewSeq(production); + seen[wsc] = rresult; + + int j = production.Length; + foreach (Field f in ((RecordSchema)sc).Fields) + { + production[--j] = Generate(f.Schema, seen); + } + } + + return rresult; + } + case Schema.Type.Union: + IList subs = ((UnionSchema)sc).Schemas; + Symbol[] symbols = new Symbol[subs.Count]; + string[] labels = new string[subs.Count]; + + int i = 0; + foreach (Schema b in ((UnionSchema)sc).Schemas) + { + symbols[i] = Generate(b, seen); + labels[i] = b.Fullname; + i++; + } + + return Symbol.NewSeq(Symbol.NewAlt(symbols, labels), Symbol.Union); + case Schema.Type.Logical: + return Generate((sc as LogicalSchema).BaseSchema, seen); + default: + throw new Exception("Unexpected schema type"); + } + } + + /// + /// A wrapper around Schema that does "==" equality. + /// + protected class LitS + { + private readonly Schema actual; + + /// + /// Initializes a new instance of the class. + /// + public LitS(Schema actual) + { + this.actual = actual; + } + + /// + /// Two LitS are equal if and only if their underlying schema is the same (not + /// merely equal). + /// + public override bool Equals(object o) + { + if (o is null) + { + return false; + } + + if (Object.ReferenceEquals(this, o)) + { + return true; + } + + if (GetType() != o.GetType()) + { + return false; + } + + return actual.Equals(((LitS)o).actual); + } + + /// + /// Returns the hash code for the current . + /// + public override int GetHashCode() + { + return actual.GetHashCode(); + } + } + } +} diff --git a/lang/csharp/src/apache/main/IO/ParsingDecoder.cs b/lang/csharp/src/apache/main/IO/ParsingDecoder.cs new file mode 100644 index 00000000000..ce327613306 --- /dev/null +++ b/lang/csharp/src/apache/main/IO/ParsingDecoder.cs @@ -0,0 +1,205 @@ +īģŋ/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using Avro.IO.Parsing; + +namespace Avro.IO +{ + /// + /// Base class for a -based + /// s. + /// + public abstract class ParsingDecoder : Decoder, Parser.IActionHandler, SkipParser.ISkipHandler + { + /// + public abstract void ReadNull(); + + /// + public abstract bool ReadBoolean(); + + /// + public abstract int ReadInt(); + + /// + public abstract long ReadLong(); + + /// + public abstract float ReadFloat(); + + /// + public abstract double ReadDouble(); + + /// + public abstract byte[] ReadBytes(); + + /// + public abstract string ReadString(); + + /// + public abstract int ReadEnum(); + + /// + public abstract long ReadArrayStart(); + + /// + public abstract long ReadArrayNext(); + + /// + public abstract long ReadMapStart(); + + /// + public abstract long ReadMapNext(); + + /// + public abstract int ReadUnionIndex(); + + /// + public abstract void ReadFixed(byte[] buffer); + + /// + public abstract void ReadFixed(byte[] buffer, int start, int length); + + /// + public abstract void SkipNull(); + + /// + public abstract void SkipBoolean(); + + /// + public abstract void SkipInt(); + + /// + public abstract void SkipLong(); + + /// + public abstract void SkipFloat(); + + /// + public abstract void SkipDouble(); + + /// + public abstract void SkipBytes(); + + /// + public abstract void SkipString(); + + /// + public abstract void SkipEnum(); + + /// + public abstract void SkipUnionIndex(); + + /// + public abstract void SkipFixed(int len); + + /// + /// Skips an array on the stream. + /// + public abstract void SkipArray(); + + /// + /// Skips a map on the stream. + /// + public abstract void SkipMap(); + + /// + public abstract Symbol DoAction(Symbol input, Symbol top); + + /// + /// The parser. + /// + protected readonly SkipParser Parser; + + /// + /// Initializes a new instance of the class. + /// + protected ParsingDecoder(Symbol root) + { + Parser = new SkipParser(root, this, this); + } + + /// + /// Skips a fixed type on the stream. + /// + protected abstract void SkipFixed(); + + /// + public virtual void SkipAction() + { + Parser.PopSymbol(); + } + + /// + public virtual void SkipTopSymbol() + { + Symbol top = Parser.TopSymbol(); + if (top == Symbol.Null) + { + ReadNull(); + } + else if (top == Symbol.Boolean) + { + ReadBoolean(); + } + else if (top == Symbol.Int) + { + ReadInt(); + } + else if (top == Symbol.Long) + { + ReadLong(); + } + else if (top == Symbol.Float) + { + ReadFloat(); + } + else if (top == Symbol.Double) + { + ReadDouble(); + } + else if (top == Symbol.String) + { + SkipString(); + } + else if (top == Symbol.Bytes) + { + SkipBytes(); + } + else if (top == Symbol.Enum) + { + ReadEnum(); + } + else if (top == Symbol.Fixed) + { + SkipFixed(); + } + else if (top == Symbol.Union) + { + ReadUnionIndex(); + } + else if (top == Symbol.ArrayStart) + { + SkipArray(); + } + else if (top == Symbol.MapStart) + { + SkipMap(); + } + } + } +} diff --git a/lang/csharp/src/apache/main/IO/ParsingEncoder.cs b/lang/csharp/src/apache/main/IO/ParsingEncoder.cs new file mode 100644 index 00000000000..637a6e3465a --- /dev/null +++ b/lang/csharp/src/apache/main/IO/ParsingEncoder.cs @@ -0,0 +1,146 @@ +īģŋ/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Avro.IO +{ + /// + /// Base class for a -based + /// s. + /// + public abstract class ParsingEncoder : Encoder + { + /// + /// Tracks the number of items that remain to be written in the collections + /// (array or map). + /// + private long[] counts = new long[10]; + + /// + /// Position into the counts stack. + /// + protected int Pos = -1; + + /// + public abstract void WriteNull(); + + /// + public abstract void WriteBoolean(bool value); + + /// + public abstract void WriteInt(int value); + + /// + public abstract void WriteLong(long value); + + /// + public abstract void WriteFloat(float value); + + /// + public abstract void WriteDouble(double value); + + /// + public abstract void WriteBytes(byte[] value); + + /// + public abstract void WriteBytes(byte[] value, int offset, int length); + + /// + public abstract void WriteString(string value); + + /// + public abstract void WriteEnum(int value); + + /// + public abstract void WriteArrayStart(); + + /// + public abstract void WriteArrayEnd(); + + /// + public abstract void WriteMapStart(); + + /// + public abstract void WriteMapEnd(); + + /// + public abstract void WriteUnionIndex(int value); + + /// + public abstract void WriteFixed(byte[] data); + + /// + public abstract void WriteFixed(byte[] data, int start, int len); + + /// + public abstract void Flush(); + + /// + public virtual void SetItemCount(long value) + { + if (counts[Pos] != 0) + { + throw new AvroTypeException("Incorrect number of items written. " + counts[Pos] + + " more required."); + } + + counts[Pos] = value; + } + + /// + public virtual void StartItem() + { + counts[Pos]--; + } + + /// + /// Push a new collection on to the stack. + /// + protected void Push() + { + if (++Pos == counts.Length) + { + Array.Resize(ref counts, Pos + 10); + } + + counts[Pos] = 0; + } + + /// + /// Pop a new collection on to the stack. + /// + protected void Pop() + { + if (counts[Pos] != 0) + { + throw new AvroTypeException("Incorrect number of items written. " + counts[Pos] + " more required."); + } + + Pos--; + } + + /// + /// Returns the position into the stack. + /// + protected int Depth() + { + return Pos; + } + } +} diff --git a/lang/csharp/src/apache/main/IO/Resolver.cs b/lang/csharp/src/apache/main/IO/Resolver.cs index c77aca7698d..60d7966cfea 100644 --- a/lang/csharp/src/apache/main/IO/Resolver.cs +++ b/lang/csharp/src/apache/main/IO/Resolver.cs @@ -158,6 +158,10 @@ public static void EncodeDefaultValue(Encoder enc, Schema schema, JToken jtok) EncodeDefaultValue(enc, (schema as UnionSchema).Schemas[0], jtok); break; + case Schema.Type.Logical: + EncodeDefaultValue(enc, (schema as LogicalSchema).BaseSchema, jtok); + break; + default: throw new AvroException("Unsupported schema type " + schema.Tag); } diff --git a/lang/csharp/src/apache/main/Protocol/Message.cs b/lang/csharp/src/apache/main/Protocol/Message.cs index 732438c9370..19cc61c84fe 100644 --- a/lang/csharp/src/apache/main/Protocol/Message.cs +++ b/lang/csharp/src/apache/main/Protocol/Message.cs @@ -198,12 +198,10 @@ public override bool Equals(Object obj) /// public override int GetHashCode() { -#pragma warning disable CA1307 // Specify StringComparison return Name.GetHashCode() + -#pragma warning restore CA1307 // Specify StringComparison - Request.GetHashCode() + - (Response == null ? 0 : Response.GetHashCode()) + - (Error == null ? 0 : Error.GetHashCode()); + Request.GetHashCode() + + (Response == null ? 0 : Response.GetHashCode()) + + (Error == null ? 0 : Error.GetHashCode()); } /// diff --git a/lang/csharp/src/apache/main/Protocol/Protocol.cs b/lang/csharp/src/apache/main/Protocol/Protocol.cs index 1f5b5410605..94ae1268a0d 100644 --- a/lang/csharp/src/apache/main/Protocol/Protocol.cs +++ b/lang/csharp/src/apache/main/Protocol/Protocol.cs @@ -269,10 +269,8 @@ private bool MessagesEquals(IDictionary that) /// public override int GetHashCode() { -#pragma warning disable CA1307 // Specify StringComparison return Name.GetHashCode() + Namespace.GetHashCode() + -#pragma warning restore CA1307 // Specify StringComparison - GetTypesHashCode() + GetMessagesHashCode(); + GetTypesHashCode() + GetMessagesHashCode(); } /// @@ -295,9 +293,10 @@ private int GetMessagesHashCode() { int hash = Messages.Count; foreach (KeyValuePair pair in Messages) -#pragma warning disable CA1307 // Specify StringComparison + { hash += pair.Key.GetHashCode() + pair.Value.GetHashCode(); -#pragma warning restore CA1307 // Specify StringComparison + } + return hash; } } diff --git a/lang/csharp/src/apache/main/Reflect/ArraySchemaExtensions.cs b/lang/csharp/src/apache/main/Reflect/ArraySchemaExtensions.cs index 33ae5f55682..10466cc8ae6 100644 --- a/lang/csharp/src/apache/main/Reflect/ArraySchemaExtensions.cs +++ b/lang/csharp/src/apache/main/Reflect/ArraySchemaExtensions.cs @@ -27,7 +27,7 @@ public static class ArraySchemaExtensions /// Return the name of the array helper /// /// this - /// value of the helper metadata - null if it isnt present + /// value of the helper metadata - null if it isn't present public static string GetHelper(this ArraySchema ars) { string s = null; diff --git a/lang/csharp/src/apache/main/Reflect/ClassCache.cs b/lang/csharp/src/apache/main/Reflect/ClassCache.cs index 430efffbf03..a64e06d2609 100644 --- a/lang/csharp/src/apache/main/Reflect/ClassCache.cs +++ b/lang/csharp/src/apache/main/Reflect/ClassCache.cs @@ -76,7 +76,7 @@ public static void AddDefaultConverter(Func /// /// - /// The first matching converter - null if there isnt one + /// The first matching converter - null if there isn't one public IAvroFieldConverter GetDefaultConverter(Avro.Schema.Type tag, Type propType) { Type avroType; @@ -159,7 +159,7 @@ public void AddArrayHelper(string name, Type helperType) public ArrayHelper GetArrayHelper(ArraySchema schema, IEnumerable enumerable) { Type h; - // note ArraySchema is unamed and doesnt have a FulllName, use "helper" metadata + // note ArraySchema is unnamed and doesn't have a FulllName, use "helper" metadata // metadata is json string, strip quotes string s = null; s = schema.GetHelper(); @@ -263,17 +263,27 @@ public void LoadClassCache(Type objType, Schema s) EnumCache.AddEnumNameMapItem(ns, objType); break; case UnionSchema us: - if (us.Schemas.Count == 2 && (us.Schemas[0].Tag == Schema.Type.Null || us.Schemas[1].Tag == Schema.Type.Null) && objType.IsClass) + if (us.Schemas.Count == 2 && (us.Schemas[0].Tag == Schema.Type.Null || us.Schemas[1].Tag == Schema.Type.Null)) { // in this case objType will match the non null type in the union foreach (var o in us.Schemas) { - if (o.Tag != Schema.Type.Null) + if (o.Tag == Schema.Type.Null) + { + continue; + } + + if (objType.IsClass) { LoadClassCache(objType, o); } - } + var innerType = Nullable.GetUnderlyingType(objType); + if (innerType != null && innerType.IsEnum) + { + LoadClassCache(innerType, o); + } + } } else { diff --git a/lang/csharp/src/apache/main/Reflect/DotnetClass.cs b/lang/csharp/src/apache/main/Reflect/DotnetClass.cs index 5bef040f631..78eaca52224 100644 --- a/lang/csharp/src/apache/main/Reflect/DotnetClass.cs +++ b/lang/csharp/src/apache/main/Reflect/DotnetClass.cs @@ -52,14 +52,14 @@ public DotnetClass(Type t, RecordSchema r, ClassCache cache) if (avroAttr != null) { hasAttribute = true; - _propertyMap.TryAdd(f.Name, new DotnetProperty(prop, f.Schema.Tag, avroAttr.Converter, cache)); + _propertyMap.TryAdd(f.Name, new DotnetProperty(prop, f.Schema, avroAttr.Converter, cache)); break; } } if (!hasAttribute) { - _propertyMap.TryAdd(f.Name, new DotnetProperty(prop, f.Schema.Tag, cache)); + _propertyMap.TryAdd(f.Name, new DotnetProperty(prop, f.Schema, cache)); } } } @@ -83,7 +83,7 @@ private PropertyInfo GetPropertyInfo(Field f) } } - throw new AvroException($"Class {_type.Name} doesnt contain property {f.Name}"); + throw new AvroException($"Class {_type.Name} doesn't contain property {f.Name}"); } /// @@ -97,7 +97,7 @@ public object GetValue(object o, Field f) DotnetProperty p; if (!_propertyMap.TryGetValue(f.Name, out p)) { - throw new AvroException($"ByPosClass doesnt contain property {f.Name}"); + throw new AvroException($"ByPosClass doesn't contain property {f.Name}"); } return p.GetValue(o, f.Schema); @@ -108,13 +108,13 @@ public object GetValue(object o, Field f) /// /// the object /// field schema - /// value for the proprty referenced by the field schema + /// value for the property referenced by the field schema public void SetValue(object o, Field f, object v) { DotnetProperty p; if (!_propertyMap.TryGetValue(f.Name, out p)) { - throw new AvroException($"ByPosClass doesnt contain property {f.Name}"); + throw new AvroException($"ByPosClass doesn't contain property {f.Name}"); } p.SetValue(o, v, f.Schema); @@ -139,7 +139,7 @@ public Type GetPropertyType(Field f) DotnetProperty p; if (!_propertyMap.TryGetValue(f.Name, out p)) { - throw new AvroException($"ByPosClass doesnt contain property {f.Name}"); + throw new AvroException($"ByPosClass doesn't contain property {f.Name}"); } return p.GetPropertyType(); diff --git a/lang/csharp/src/apache/main/Reflect/DotnetProperty.cs b/lang/csharp/src/apache/main/Reflect/DotnetProperty.cs index 4ddcdc69df0..42ae766bd23 100644 --- a/lang/csharp/src/apache/main/Reflect/DotnetProperty.cs +++ b/lang/csharp/src/apache/main/Reflect/DotnetProperty.cs @@ -28,9 +28,10 @@ internal class DotnetProperty public IAvroFieldConverter Converter { get; set; } - private bool IsPropertyCompatible(Avro.Schema.Type schemaTag) + private bool IsPropertyCompatible(Avro.Schema schema) { Type propType; + var schemaTag = schema.Tag; if (Converter == null) { @@ -74,21 +75,25 @@ private bool IsPropertyCompatible(Avro.Schema.Type schemaTag) return propType == typeof(byte[]); case Avro.Schema.Type.Error: return propType.IsClass; + case Avro.Schema.Type.Logical: + var logicalSchema = (LogicalSchema)schema; + var type = logicalSchema.LogicalType.GetCSharpType(false); + return type == propType; } return false; } - public DotnetProperty(PropertyInfo property, Avro.Schema.Type schemaTag, IAvroFieldConverter converter, ClassCache cache) + public DotnetProperty(PropertyInfo property, Avro.Schema schema, IAvroFieldConverter converter, ClassCache cache) { _property = property; Converter = converter; - if (!IsPropertyCompatible(schemaTag)) + if (!IsPropertyCompatible(schema)) { if (Converter == null) { - var c = cache.GetDefaultConverter(schemaTag, _property.PropertyType); + var c = cache.GetDefaultConverter(schema.Tag, _property.PropertyType); if (c != null) { Converter = c; @@ -96,12 +101,12 @@ public DotnetProperty(PropertyInfo property, Avro.Schema.Type schemaTag, IAvroF } } - throw new AvroException($"Property {property.Name} in object {property.DeclaringType} isn't compatible with Avro schema type {schemaTag}"); + throw new AvroException($"Property {property.Name} in object {property.DeclaringType} isn't compatible with Avro schema type {schema.Tag}"); } } - public DotnetProperty(PropertyInfo property, Avro.Schema.Type schemaTag, ClassCache cache) - : this(property, schemaTag, null, cache) + public DotnetProperty(PropertyInfo property, Avro.Schema schema, ClassCache cache) + : this(property, schema, null, cache) { } diff --git a/lang/csharp/src/apache/main/Reflect/EnumCache.cs b/lang/csharp/src/apache/main/Reflect/EnumCache.cs index 7fbfc998d12..463758915f4 100644 --- a/lang/csharp/src/apache/main/Reflect/EnumCache.cs +++ b/lang/csharp/src/apache/main/Reflect/EnumCache.cs @@ -48,7 +48,7 @@ public static Type GetEnumeration(NamedSchema schema) Type t; if (!_nameEnumMap.TryGetValue(schema.Fullname, out t)) { - throw new AvroException($"Couldnt find enumeration for avro fullname: {schema.Fullname}"); + throw new AvroException($"Couldn't find enumeration for avro fullname: {schema.Fullname}"); } return t; diff --git a/lang/csharp/src/apache/main/Reflect/README.md b/lang/csharp/src/apache/main/Reflect/README.md index 3573c6a309e..e3cb2e4cc48 100644 --- a/lang/csharp/src/apache/main/Reflect/README.md +++ b/lang/csharp/src/apache/main/Reflect/README.md @@ -1,12 +1,12 @@ # Namespace Avro.Reflect -This namespace contains classes that implement Avro serialization and deserialization for plain C# objects. The classes use .net reflection to implement the serializers. The interface is similar to the Generic and Specific serialiation classes. +This namespace contains classes that implement Avro serialization and deserialization for plain C# objects. The classes use .net reflection to implement the serializers. The interface is similar to the Generic and Specific serialization classes. ## Serialization The approach starts with the schema and iterates both the schema and the dotnet type together in a depth first manner per the specification. Serialization is the same as the Generic serializer except where the serializer encounters: - *A fixed type*: if the corresponding dotnet object type is a byte[] of the correct length then the object is serialized, otherwise an exception is thrown. -- *A record type*: the serializer matches the schema property name to the dotnet object property name and then reursively serializes the schema property and the dotnet object property +- *A record type*: the serializer matches the schema property name to the dotnet object property name and then recursively serializes the schema property and the dotnet object property - *An array type*: See array serialization/deserialization. Basic serialization is performed as in the following example: @@ -37,7 +37,7 @@ You might want to do this if your class contains interfaces and/or if you use an See the section on Arrays. The ArrayHelper specifies the type of object created when an array is deserialized. The default is List\. -The type created for Map objects is specified by the Deserializer property MapType. *This must be a two (or more) parameter generic type where the first type paramater is string and the second is undefined* e.g. Dictionary. +The type created for Map objects is specified by the Deserializer property MapType. *This must be a two (or more) parameter generic type where the first type parameter is string and the second is undefined* e.g. Dictionary. ```csharp public Type MapType { get; set; } ``` diff --git a/lang/csharp/src/apache/main/Reflect/ReflectDefaultReader.cs b/lang/csharp/src/apache/main/Reflect/ReflectDefaultReader.cs index 676d9f39d7b..034cb89f88e 100644 --- a/lang/csharp/src/apache/main/Reflect/ReflectDefaultReader.cs +++ b/lang/csharp/src/apache/main/Reflect/ReflectDefaultReader.cs @@ -50,7 +50,7 @@ public class ReflectDefaultReader : SpecificDefaultReader /// /// Delegate to a factory method to create objects of type x. If you are deserializing to interfaces - /// you could use an IoC container factory insread of the default. Default is Activator.CreateInstance() + /// you could use an IoC container factory instead of the default. Default is Activator.CreateInstance() /// /// public Func RecordFactory { get => _recordFactory; set => _recordFactory = value; } @@ -176,7 +176,7 @@ internal Type GetTypeFromSchema(Schema schema, bool nullable) throw new Exception("Unable to cast schema into a union schema"); } - Schema nullibleType = CodeGen.getNullableType(unionSchema); + Schema nullibleType = CodeGen.GetNullableType(unionSchema); if (nullibleType == null) { return typeof(object); @@ -373,7 +373,7 @@ public object GetDefaultValue(Schema s, JToken defaultValue) /// /// Deserializes a enum. Uses CreateEnum to construct the new enum object. /// - /// If appropirate, uses this instead of creating a new enum object. + /// If appropriate, uses this instead of creating a new enum object. /// The schema the writer used while writing the enum /// The schema the reader is using /// The decoder for deserialization. @@ -450,10 +450,10 @@ protected override object ReadRecord(object reuse, RecordSchema writerSchema, Sc /// /// If appropriate, uses this object instead of creating a new one. /// The FixedSchema the writer used during serialization. - /// The schema that the readr uses. Must be a FixedSchema with the same + /// The schema that the reader uses. Must be a FixedSchema with the same /// size as the writerSchema. /// The decoder for deserialization. - /// The deserilized object. + /// The deserialized object. protected override object ReadFixed(object reuse, FixedSchema writerSchema, Schema readerSchema, Decoder d) { FixedSchema rs = readerSchema as FixedSchema; diff --git a/lang/csharp/src/apache/main/Reflect/ReflectDefaultWriter.cs b/lang/csharp/src/apache/main/Reflect/ReflectDefaultWriter.cs index a6397c65001..e5ef4a2124c 100644 --- a/lang/csharp/src/apache/main/Reflect/ReflectDefaultWriter.cs +++ b/lang/csharp/src/apache/main/Reflect/ReflectDefaultWriter.cs @@ -199,6 +199,8 @@ protected override bool Matches(Schema sc, object obj) return false; // Union directly within another union not allowed! case Schema.Type.Fixed: return obj is byte[]; + case Schema.Type.Logical: + return ((LogicalSchema)sc).LogicalType.IsInstanceOfLogicalType(obj); default: throw new AvroException("Unknown schema type: " + sc.Tag); } diff --git a/lang/csharp/src/apache/main/Reflect/ReflectReader.cs b/lang/csharp/src/apache/main/Reflect/ReflectReader.cs index 0c2df58a9a5..e39e30d32cf 100644 --- a/lang/csharp/src/apache/main/Reflect/ReflectReader.cs +++ b/lang/csharp/src/apache/main/Reflect/ReflectReader.cs @@ -72,7 +72,7 @@ public ReflectReader(ReflectDefaultReader reader) /// Generic read function /// /// object to store data read - /// decorder to use for reading data + /// decoder to use for reading data /// public T Read(T reuse, Decoder dec) { @@ -82,7 +82,7 @@ public T Read(T reuse, Decoder dec) /// /// Generic read function /// - /// decorder to use for reading data + /// decoder to use for reading data /// public T Read(Decoder dec) { diff --git a/lang/csharp/src/apache/main/Schema/Aliases.cs b/lang/csharp/src/apache/main/Schema/Aliases.cs new file mode 100644 index 00000000000..6574e3163d6 --- /dev/null +++ b/lang/csharp/src/apache/main/Schema/Aliases.cs @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System.Collections.Generic; +using System.Linq; + +namespace Avro +{ + internal static class Aliases + { + internal static IList GetSchemaNames(IEnumerable aliases, string enclosingTypeName, string enclosingTypeNamespace) + { + if (aliases == null) + { + return null; + } + + SchemaName enclosingSchemaName = new SchemaName(enclosingTypeName, enclosingTypeNamespace, null, null); + return aliases.Select(alias => new SchemaName(alias, enclosingSchemaName.Namespace, null, null)).ToList(); + } + } +} diff --git a/lang/csharp/src/apache/main/Schema/ArraySchema.cs b/lang/csharp/src/apache/main/Schema/ArraySchema.cs index 5b4e6a434e0..7c4d8e1a9c8 100644 --- a/lang/csharp/src/apache/main/Schema/ArraySchema.cs +++ b/lang/csharp/src/apache/main/Schema/ArraySchema.cs @@ -29,7 +29,7 @@ public class ArraySchema : UnnamedSchema /// /// Schema for the array 'type' attribute /// - public Schema ItemSchema { get; set; } + public Schema ItemSchema { get; set; } /// /// Static class to return a new instance of ArraySchema @@ -48,14 +48,25 @@ internal static ArraySchema NewInstance(JToken jtok, PropertyMap props, SchemaNa } /// - /// Constructor + /// Creates a new /// - /// schema for the array items type - /// dictionary that provides access to custom properties - private ArraySchema(Schema items, PropertyMap props) : base(Type.Array, props) + /// Schema for the array items type + /// Dictionary that provides access to custom properties + /// + public static ArraySchema Create(Schema items, PropertyMap customAttributes = null) + { + return new ArraySchema(items, customAttributes); + } + + /// + /// Initializes a new instance of the class. + /// + /// Schema for the array items type + /// Dictionary that provides access to custom properties + private ArraySchema(Schema items, PropertyMap customAttributes) + : base(Type.Array, customAttributes) { - if (null == items) throw new ArgumentNullException(nameof(items)); - this.ItemSchema = items; + ItemSchema = items ?? throw new ArgumentNullException(nameof(items)); } /// diff --git a/lang/csharp/src/apache/main/Schema/EnumSchema.cs b/lang/csharp/src/apache/main/Schema/EnumSchema.cs index 3fd14500414..225780310a6 100644 --- a/lang/csharp/src/apache/main/Schema/EnumSchema.cs +++ b/lang/csharp/src/apache/main/Schema/EnumSchema.cs @@ -17,7 +17,8 @@ */ using System; using System.Collections.Generic; -using System.Text; +using System.Linq; +using System.Text.RegularExpressions; using Newtonsoft.Json.Linq; namespace Avro @@ -30,7 +31,7 @@ public class EnumSchema : NamedSchema /// /// List of strings representing the enum symbols /// - public IList Symbols { get; private set; } + public IList Symbols { get; private set; } /// /// The default token to use when deserializing an enum when the provided token is not found @@ -47,6 +48,34 @@ public class EnumSchema : NamedSchema /// public int Count { get { return Symbols.Count; } } + /// + /// Initializes a new instance of the class. + /// + /// Name of enum + /// Namespace of enum + /// List of aliases for the name + /// List of enum symbols + /// Custom properties on this schema + /// Documentation for this named schema + /// + public static EnumSchema Create(string name, + IEnumerable symbols, + string space = null, + IEnumerable aliases = null, + PropertyMap customProperties = null, + string doc = null, + string defaultSymbol = null) + { + return new EnumSchema(new SchemaName(name, space, null, doc), + Aliases.GetSchemaNames(aliases, name, space), + symbols.ToList(), + CreateSymbolsMap(symbols), + customProperties, + new SchemaNames(), + doc, + defaultSymbol); + } + /// /// Static function to return new instance of EnumSchema /// @@ -81,7 +110,7 @@ internal static EnumSchema NewInstance(JToken jtok, PropertyMap props, SchemaNam return new EnumSchema(name, aliases, symbols, symbolMap, props, names, JsonHelper.GetOptionalString(jtok, "doc"), JsonHelper.GetOptionalString(jtok, "default")); } - catch (SchemaParseException e) + catch (AvroException e) { throw new SchemaParseException($"{e.Message} at '{jtok.Path}'", e); } @@ -103,15 +132,49 @@ private EnumSchema(SchemaName name, IList aliases, List symb string doc, string defaultSymbol) : base(Type.Enumeration, name, aliases, props, names, doc) { - if (null == name.Name) throw new SchemaParseException("name cannot be null for enum schema."); + if (null == name.Name) throw new AvroException("name cannot be null for enum schema."); this.Symbols = symbols; this.symbolMap = symbolMap; if (null != defaultSymbol && !symbolMap.ContainsKey(defaultSymbol)) - throw new SchemaParseException($"Default symbol: {defaultSymbol} not found in symbols"); + throw new AvroException($"Default symbol: {defaultSymbol} not found in symbols"); Default = defaultSymbol; } + /// + /// Creates symbols map from specified list of symbols. + /// Symbol map contains the names of the symbols and their index. + /// + /// List of symbols + /// Symbol map + /// Is thrown if the symbols list contains invalid symbol name or duplicate symbols + private static IDictionary CreateSymbolsMap(IEnumerable symbols) + { + IDictionary symbolMap = new Dictionary(); + int i = 0; + foreach (var symbol in symbols) + { + ValidateSymbolName(symbol); + + if (symbolMap.ContainsKey(symbol)) + { + throw new AvroException($"Duplicate symbol: {symbol}"); + } + + symbolMap[symbol] = i++; + } + + return symbolMap; + } + + private static void ValidateSymbolName(string symbol) + { + if(string.IsNullOrEmpty(symbol) || !Regex.IsMatch(symbol, "^([A-Za-z_][A-Za-z0-9_]*)$")) + { + throw new AvroException($"Invalid symbol name: {symbol}"); + } + } + /// /// Writes enum schema in JSON format /// @@ -127,7 +190,7 @@ protected internal override void WriteJsonFields(Newtonsoft.Json.JsonTextWriter foreach (string s in this.Symbols) writer.WriteValue(s); writer.WriteEndArray(); - if (null != Default) + if (null != Default) { writer.WritePropertyName("default"); writer.WriteValue(Default); @@ -139,16 +202,24 @@ protected internal override void WriteJsonFields(Newtonsoft.Json.JsonTextWriter /// Throws AvroException if the symbol is not found in this enum. /// /// name of the symbol to find - /// position of the given symbol in this enum schema + /// + /// position of the given symbol in this enum schema + /// + /// No such symbol: {symbol} public int Ordinal(string symbol) { int result; if (symbolMap.TryGetValue(symbol, out result)) + { return result; - if (null != Default) - return symbolMap[Default]; + } + + if (Default != null && symbolMap.TryGetValue(Default, out result)) + { + return result; + } - throw new AvroException("No such symbol: " + symbol); + throw new AvroException($"No such symbol: {symbol}"); } /// @@ -218,9 +289,11 @@ public override bool Equals(object obj) public override int GetHashCode() { int result = SchemaName.GetHashCode() + getHashCode(Props); -#pragma warning disable CA1307 // Specify StringComparison - foreach (string s in Symbols) result += 23 * s.GetHashCode(); -#pragma warning restore CA1307 // Specify StringComparison + foreach (string s in Symbols) + { + result += 23 * s.GetHashCode(); + } + return result; } diff --git a/lang/csharp/src/apache/main/Schema/Field.cs b/lang/csharp/src/apache/main/Schema/Field.cs index bdfe9282cb3..799f265b320 100644 --- a/lang/csharp/src/apache/main/Schema/Field.cs +++ b/lang/csharp/src/apache/main/Schema/Field.cs @@ -54,18 +54,10 @@ public enum SortOrder /// public readonly string Name; - /// - /// List of aliases for the field name - /// - [Obsolete("Use Aliases instead. This will be removed from the public API in a future version.")] - public readonly IList aliases; - -#pragma warning disable CS0618 // Type or member is obsolete /// /// List of aliases for the field name. /// - public IList Aliases => aliases; -#pragma warning restore CS0618 // Type or member is obsolete + public IList Aliases { get; private set; } /// /// Position of the field within its record. @@ -103,15 +95,42 @@ public enum SortOrder /// /// Static comparer object for JSON objects such as the fields default value /// - internal static JTokenEqualityComparer JtokenEqual = new JTokenEqualityComparer(); + internal readonly static JTokenEqualityComparer JtokenEqual = new JTokenEqualityComparer(); /// - /// A flag to indicate if reader schema has a field that is missing from writer schema and has a default value - /// This is set in CanRead() which is always be called before deserializing data + /// Initializes a new instance of the class. /// + /// schema for the field type. + /// name of the field. + /// list of aliases for the name of the field. + /// position of the field. + /// documentation for the field. + /// field's default value if it exists. + /// sort order of the field. + /// dictionary that provides access to custom properties. + public Field(Schema schema, + string name, + int pos, + IList aliases = null, + string doc = null, + JToken defaultValue = null, + SortOrder sortorder = SortOrder.ignore, + PropertyMap customProperties = null) + : this(schema, name, aliases, pos, doc, defaultValue, sortorder, customProperties) + { + } /// - /// Constructor for the field class + /// Creates a new field based on the specified field, with a different position. + /// + /// A clone of this field with new position. + internal Field ChangePosition(int newPosition) + { + return new Field(Schema, Name, newPosition, Aliases, Documentation, DefaultValue, Ordering ?? SortOrder.ignore, Props); + } + + /// + /// Initializes a new instance of the class. /// /// schema for the field type /// name of the field @@ -121,21 +140,27 @@ public enum SortOrder /// field's default value if it exists /// sort order of the field /// dictionary that provides access to custom properties + /// + /// name - name cannot be null. + /// or + /// type - type cannot be null. + /// internal Field(Schema schema, string name, IList aliases, int pos, string doc, JToken defaultValue, SortOrder sortorder, PropertyMap props) { - if (string.IsNullOrEmpty(name)) throw new ArgumentNullException(nameof(name), "name cannot be null."); - if (null == schema) throw new ArgumentNullException("type", "type cannot be null."); - this.Schema = schema; - this.Name = name; -#pragma warning disable CS0618 // Type or member is obsolete - this.aliases = aliases; -#pragma warning restore CS0618 // Type or member is obsolete - this.Pos = pos; - this.Documentation = doc; - this.DefaultValue = defaultValue; - this.Ordering = sortorder; - this.Props = props; + if (string.IsNullOrEmpty(name)) + { + throw new ArgumentNullException(nameof(name), "name cannot be null."); + } + + Schema = schema ?? throw new ArgumentNullException("type", "type cannot be null."); + Name = name; + Aliases = aliases; + Pos = pos; + Documentation = doc; + DefaultValue = defaultValue; + Ordering = sortorder; + Props = props; } /// diff --git a/lang/csharp/src/apache/main/Schema/FixedSchema.cs b/lang/csharp/src/apache/main/Schema/FixedSchema.cs index b16c1ff1dcb..2b24e6b8689 100644 --- a/lang/csharp/src/apache/main/Schema/FixedSchema.cs +++ b/lang/csharp/src/apache/main/Schema/FixedSchema.cs @@ -32,6 +32,20 @@ public class FixedSchema : NamedSchema /// public int Size { get; set; } + /// + /// Initializes a new instance of the class. + /// + /// Name of the fixed schema + /// List of aliases for the name + /// Fixed size + /// Namespace of fixed + /// Custom properties on this schema + /// Documentation for this named schema + public static FixedSchema Create(string name, int size, string space = null, IEnumerable aliases = null, PropertyMap customProperties = null, string doc = null) + { + return new FixedSchema(new SchemaName(name, space, null, doc), Aliases.GetSchemaNames(aliases, name, space), size, customProperties, new SchemaNames(), doc); + } + /// /// Static function to return new instance of the fixed schema class /// diff --git a/lang/csharp/src/apache/main/Schema/JsonHelper.cs b/lang/csharp/src/apache/main/Schema/JsonHelper.cs index 87887f5cd27..ccdf8f7345a 100644 --- a/lang/csharp/src/apache/main/Schema/JsonHelper.cs +++ b/lang/csharp/src/apache/main/Schema/JsonHelper.cs @@ -82,7 +82,7 @@ public static int GetRequiredInteger(JToken jtok, string field) /// /// JSON object to read /// property name - /// null if property doesn't exist, otherise returns property boolean value + /// null if property doesn't exist, otherwise returns property boolean value public static bool? GetOptionalBoolean(JToken jtok, string field) { if (null == jtok) throw new ArgumentNullException(nameof(jtok), "jtok cannot be null."); @@ -98,7 +98,7 @@ public static int GetRequiredInteger(JToken jtok, string field) } /// - /// Writes JSON property name and value if value is not null + /// Writes JSON property name and value if value is not null or empty /// /// JSON writer /// property name @@ -110,5 +110,18 @@ internal static void writeIfNotNullOrEmpty(JsonTextWriter writer, string key, st writer.WriteValue(value); } + + /// + /// Write JSON property name and value, if value is not null + /// + /// JSON writer + /// property name + /// property value + internal static void writeIfNotNull(JsonTextWriter writer, string key, string value) + { + if (value == null) return; + writer.WritePropertyName(key); + writer.WriteValue(value); + } } } diff --git a/lang/csharp/src/apache/main/Schema/LogicalSchema.cs b/lang/csharp/src/apache/main/Schema/LogicalSchema.cs index 3c1928ee47f..49d04875d5b 100644 --- a/lang/csharp/src/apache/main/Schema/LogicalSchema.cs +++ b/lang/csharp/src/apache/main/Schema/LogicalSchema.cs @@ -47,15 +47,27 @@ internal static LogicalSchema NewInstance(JToken jtok, PropertyMap props, Schema JToken jtype = jtok["type"]; if (null == jtype) throw new AvroTypeException("Logical Type does not have 'type'"); - return new LogicalSchema(Schema.ParseJson(jtype, names, encspace), JsonHelper.GetRequiredString(jtok, "logicalType"), props); + JToken baseSchemaToken = jtype; + + if (jtok is JObject jo && jtype.Type == JTokenType.String) + { + string typeStr = (string)jtype; + if (typeStr == "record" || typeStr == "enum" || typeStr == "array" || typeStr == "map" || typeStr == "fixed") + { + var clone = (JObject)jo.DeepClone(); + clone.Property("logicalType")?.Remove(); + baseSchemaToken = clone; + } + } + + return new LogicalSchema(Schema.ParseJson(baseSchemaToken, names, encspace), JsonHelper.GetRequiredString(jtok, "logicalType"), props); } private LogicalSchema(Schema baseSchema, string logicalTypeName, PropertyMap props) : base(Type.Logical, props) { - if (null == baseSchema) throw new ArgumentNullException(nameof(baseSchema)); - BaseSchema = baseSchema; + BaseSchema = baseSchema ?? throw new ArgumentNullException(nameof(baseSchema)); LogicalTypeName = logicalTypeName; - LogicalType = LogicalTypeFactory.Instance.GetFromLogicalSchema(this); + LogicalType = LogicalTypeFactory.Instance.GetFromLogicalSchema(this, true); } /// @@ -76,6 +88,18 @@ protected internal override void WriteJson(Newtonsoft.Json.JsonTextWriter writer writer.WriteEndObject(); } + /// + public override string Name + { + get { return BaseSchema.Name; } + } + + /// + public override string Fullname + { + get { return BaseSchema.Fullname; } + } + /// /// Checks if this schema can read data written by the given schema. Used for decoding data. /// @@ -86,6 +110,7 @@ public override bool CanRead(Schema writerSchema) if (writerSchema.Tag != Tag) return false; LogicalSchema that = writerSchema as LogicalSchema; + return BaseSchema.CanRead(that.BaseSchema); } diff --git a/lang/csharp/src/apache/main/Schema/MapSchema.cs b/lang/csharp/src/apache/main/Schema/MapSchema.cs index 54bc05a8d31..a1a6a4222b9 100644 --- a/lang/csharp/src/apache/main/Schema/MapSchema.cs +++ b/lang/csharp/src/apache/main/Schema/MapSchema.cs @@ -36,10 +36,11 @@ public class MapSchema : UnnamedSchema /// Creates a new from the given schema. /// /// Schema to create the map schema from. + /// Dictionary that provides access to custom properties /// A new . - public static MapSchema CreateMap(Schema type) + public static MapSchema CreateMap(Schema type, PropertyMap customProperties = null) { - return new MapSchema(type,null); + return new MapSchema(type, customProperties); } /// @@ -67,12 +68,12 @@ internal static MapSchema NewInstance(JToken jtok, PropertyMap props, SchemaName /// /// Constructor for map schema class /// - /// schema for map values type - /// dictionary that provides access to custom properties - private MapSchema(Schema valueSchema, PropertyMap props) : base(Type.Map, props) + /// Schema for map values type + /// Dictionary that provides access to custom properties + private MapSchema(Schema valueSchema, PropertyMap cutsomProperties) + : base(Type.Map, cutsomProperties) { - if (null == valueSchema) throw new ArgumentNullException(nameof(valueSchema), "valueSchema cannot be null."); - this.ValueSchema = valueSchema; + ValueSchema = valueSchema ?? throw new ArgumentNullException(nameof(valueSchema), "valueSchema cannot be null."); } /// diff --git a/lang/csharp/src/apache/main/Schema/NamedSchema.cs b/lang/csharp/src/apache/main/Schema/NamedSchema.cs index ca54440806b..fe9d2145b1d 100644 --- a/lang/csharp/src/apache/main/Schema/NamedSchema.cs +++ b/lang/csharp/src/apache/main/Schema/NamedSchema.cs @@ -78,6 +78,7 @@ public override string Fullname internal static NamedSchema NewInstance(JObject jo, PropertyMap props, SchemaNames names, string encspace) { string type = JsonHelper.GetRequiredString(jo, "type"); + string doc = JsonHelper.GetOptionalString(jo, "doc"); switch (type) { case "fixed": @@ -90,7 +91,7 @@ internal static NamedSchema NewInstance(JObject jo, PropertyMap props, SchemaNam return RecordSchema.NewInstance(Type.Error, jo, props, names, encspace); default: NamedSchema result; - if (names.TryGetValue(type, null, encspace, out result)) + if (names.TryGetValue(type, null, encspace, doc, out result)) return result; return null; } @@ -128,7 +129,8 @@ protected static SchemaName GetName(JToken jtok, string encspace) { String n = JsonHelper.GetOptionalString(jtok, "name"); // Changed this to optional string for anonymous records in messages String ns = JsonHelper.GetOptionalString(jtok, "namespace"); - return new SchemaName(n, ns, encspace); + String d = JsonHelper.GetOptionalString(jtok, "doc"); + return new SchemaName(n, ns, encspace, d); } /// @@ -136,7 +138,7 @@ protected static SchemaName GetName(JToken jtok, string encspace) /// /// JSON object to read /// namespace of the name this alias is for - /// enclosing namespace of the name this alias is for + /// enclosing namespace of the name this alias is for /// List of SchemaName that represents the list of alias. If no 'aliases' specified, then it returns null. protected static IList GetAliases(JToken jtok, string space, string encspace) { @@ -153,7 +155,7 @@ protected static IList GetAliases(JToken jtok, string space, string if (jalias.Type != JTokenType.String) throw new SchemaParseException($"Aliases must be of format JSON array of strings at '{jtok.Path}'"); - aliases.Add(new SchemaName((string)jalias, space, encspace)); + aliases.Add(new SchemaName((string)jalias, space, encspace, null)); } return aliases; } diff --git a/lang/csharp/src/apache/main/Schema/PrimitiveSchema.cs b/lang/csharp/src/apache/main/Schema/PrimitiveSchema.cs index 1a55c2ff863..db5db2cb03e 100644 --- a/lang/csharp/src/apache/main/Schema/PrimitiveSchema.cs +++ b/lang/csharp/src/apache/main/Schema/PrimitiveSchema.cs @@ -16,8 +16,7 @@ * limitations under the License. */ using System; -using System.Collections.Generic; -using System.Text; +using System.Linq; using Newtonsoft.Json; namespace Avro @@ -31,11 +30,23 @@ public sealed class PrimitiveSchema : UnnamedSchema /// Constructor for primitive schema /// /// - /// dictionary that provides access to custom properties - private PrimitiveSchema(Type type, PropertyMap props) : base(type, props) + /// dictionary that provides access to custom properties + private PrimitiveSchema(Type type, PropertyMap customProperties) + : base(type, customProperties) { } + /// + /// Creates a new instance of + /// + /// The primitive type to create + /// Dictionary that provides access to custom properties + /// + public static PrimitiveSchema Create(Type type, PropertyMap customProperties = null) + { + return new PrimitiveSchema(type, customProperties); + } + /// /// Static function to return new instance of primitive schema /// @@ -82,7 +93,22 @@ public static PrimitiveSchema NewInstance(string type, PropertyMap props = null) /// protected internal override void WriteJson(JsonTextWriter w, SchemaNames names, string encspace) { - w.WriteValue(Name); + if(this.Props?.Any() == true) + { + w.WriteStartObject(); + w.WritePropertyName("type"); + w.WriteValue(Name); + foreach(var prop in Props) + { + w.WritePropertyName(prop.Key); + w.WriteRawValue(prop.Value); + } + w.WriteEndObject(); + } + else + { + w.WriteValue(Name); + } } /// diff --git a/lang/csharp/src/apache/main/Schema/Property.cs b/lang/csharp/src/apache/main/Schema/Property.cs index f4240721f1a..1774bebff87 100644 --- a/lang/csharp/src/apache/main/Schema/Property.cs +++ b/lang/csharp/src/apache/main/Schema/Property.cs @@ -36,7 +36,7 @@ public class PropertyMap : Dictionary /// Parses the custom properties from the given JSON object and stores them /// into the schema's list of custom properties /// - /// JSON object to prase + /// JSON object to parse public void Parse(JToken jtok) { JObject jo = jtok as JObject; diff --git a/lang/csharp/src/apache/main/Schema/RecordSchema.cs b/lang/csharp/src/apache/main/Schema/RecordSchema.cs index 6f01d0ca898..910bc466fe9 100644 --- a/lang/csharp/src/apache/main/Schema/RecordSchema.cs +++ b/lang/csharp/src/apache/main/Schema/RecordSchema.cs @@ -17,6 +17,7 @@ */ using System; using System.Collections.Generic; +using System.Linq; using Newtonsoft.Json.Linq; namespace Avro @@ -28,10 +29,26 @@ namespace Avro /// public class RecordSchema : NamedSchema { + private List _fields; + /// /// List of fields in the record /// - public List Fields { get; private set; } + public List Fields + { + get + { + return _fields; + } + + set + { + _fields = SetFieldsPositions(value); + + fieldLookup = CreateFieldMap(_fields); + fieldAliasLookup = CreateFieldMap(_fields, true); + } + } /// /// Number of fields in the record @@ -41,10 +58,109 @@ public class RecordSchema : NamedSchema /// /// Map of field name and Field object for faster field lookups /// - private readonly IDictionary fieldLookup; + private IDictionary fieldLookup; - private readonly IDictionary fieldAliasLookup; - private bool request; + private IDictionary fieldAliasLookup; + private readonly bool request; + + /// + /// Creates a new instance of + /// + /// name of the record schema + /// list of fields for the record + /// type of record schema, either record or error + /// list of aliases for the record name + /// custom properties on this schema + /// documentation for this named schema + public static RecordSchema Create(string name, + List fields, + string space = null, + IEnumerable aliases = null, + PropertyMap customProperties = null, + string doc = null) + { + return new RecordSchema(Type.Record, + new SchemaName(name, space, null, doc), + Aliases.GetSchemaNames(aliases, name, space), + customProperties, + fields, + false, + CreateFieldMap(fields), + CreateFieldMap(fields, true), + new SchemaNames(), + doc); + } + + private static IEnumerable EnumerateSchemasRecursive(Schema schema) + { + yield return schema; + switch (schema.Tag) + { + case Type.Null: + break; + case Type.Boolean: + break; + case Type.Int: + break; + case Type.Long: + break; + case Type.Float: + break; + case Type.Double: + break; + case Type.Bytes: + break; + case Type.String: + break; + case Type.Record: + var recordSchema = (RecordSchema)schema; + recordSchema.Fields.SelectMany(f => EnumerateSchemasRecursive(f.Schema)); + break; + case Type.Enumeration: + break; + case Type.Array: + var arraySchema = (ArraySchema)schema; + EnumerateSchemasRecursive(arraySchema.ItemSchema); + break; + case Type.Map: + var mapSchema = (MapSchema)schema; + EnumerateSchemasRecursive(mapSchema.ValueSchema); + break; + case Type.Union: + var unionSchema = (UnionSchema)schema; + foreach (var innerSchema in unionSchema.Schemas) + { + EnumerateSchemasRecursive(innerSchema); + } + break; + case Type.Fixed: + break; + case Type.Error: + break; + case Type.Logical: + break; + } + } + + private static IDictionary CreateFieldMap(List fields, bool includeAliases = false) + { + var map = new Dictionary(); + if (fields != null) + { + foreach (Field field in fields) + { + addToFieldMap(map, field.Name, field); + + if (includeAliases && field.Aliases != null) + { + foreach (var alias in field.Aliases) + addToFieldMap(map, alias, field); + } + } + } + + return map; + } /// /// Static function to return new instance of the record schema @@ -99,8 +215,10 @@ internal static RecordSchema NewInstance(Type type, JToken jtok, PropertyMap pro if (null != field.Aliases) // add aliases to field lookup map so reader function will find it when writer field name appears only as an alias on the reader field foreach (string alias in field.Aliases) addToFieldMap(fieldAliasMap, alias, field); + + result._fields = fields; } - catch (SchemaParseException e) + catch (AvroException e) { throw new SchemaParseException($"{e.Message} at '{jfield.Path}'", e); } @@ -121,7 +239,7 @@ internal static RecordSchema NewInstance(Type type, JToken jtok, PropertyMap pro /// map of field aliases and field objects /// list of named schema already read /// documentation for this named schema - private RecordSchema(Type type, SchemaName name, IList aliases, PropertyMap props, + private RecordSchema(Type type, SchemaName name, IList aliases, PropertyMap props, List fields, bool request, IDictionary fieldMap, IDictionary fieldAliasMap, SchemaNames names, string doc) : base(type, name, aliases, props, names, doc) @@ -149,7 +267,7 @@ private static Field createField(JToken jfield, int pos, SchemaNames names, stri var jorder = JsonHelper.GetOptionalString(jfield, "order"); Field.SortOrder sortorder = Field.SortOrder.ignore; if (null != jorder) - sortorder = (Field.SortOrder) Enum.Parse(typeof(Field.SortOrder), jorder); + sortorder = (Field.SortOrder)Enum.Parse(typeof(Field.SortOrder), jorder); var aliases = Field.GetAliases(jfield); var props = Schema.GetProperties(jfield); @@ -165,10 +283,20 @@ private static Field createField(JToken jfield, int pos, SchemaNames names, stri private static void addToFieldMap(Dictionary map, string name, Field field) { if (map.ContainsKey(name)) - throw new SchemaParseException("field or alias " + name + " is a duplicate name"); + throw new AvroException("field or alias " + name + " is a duplicate name"); map.Add(name, field); } + /// + /// Clones the fields with updated positions. Updates the positions according to the order of the fields in the list. + /// + /// List of fields + /// New list of cloned fields with updated positions + private List SetFieldsPositions(List fields) + { + return fields.Select((field, i) => field.ChangePosition(i)).ToList(); + } + /// /// Returns the field with the given name. /// @@ -354,9 +482,9 @@ public RecordSchemaPair(RecordSchema first, RecordSchema second) * we can detect it. * * The infinite loop happens in ToString(), Equals() and GetHashCode() methods. - * Though it does not happen for CanRead() because of the current implemenation of UnionSchema's can read, - * it could potenitally happen. - * We do a linear seach for the marker as we don't expect the list to be very long. + * Though it does not happen for CanRead() because of the current implementation of UnionSchema's can read, + * it could potentially happen. + * We do a linear search for the marker as we don't expect the list to be very long. */ private T protect(Function bypass, Function main, RecordSchema that) { diff --git a/lang/csharp/src/apache/main/Schema/Schema.cs b/lang/csharp/src/apache/main/Schema/Schema.cs index 94b96dbcadd..1910fd46cb1 100644 --- a/lang/csharp/src/apache/main/Schema/Schema.cs +++ b/lang/csharp/src/apache/main/Schema/Schema.cs @@ -166,7 +166,7 @@ internal static Schema ParseJson(JToken jtok, SchemaNames names, string encspace if (null != ps) return ps; NamedSchema schema = null; - if (names.TryGetValue(value, null, encspace, out schema)) return schema; + if (names.TryGetValue(value, null, encspace, null, out schema)) return schema; throw new SchemaParseException($"Undefined name: {value} at '{jtok.Path}'"); } @@ -196,14 +196,26 @@ internal static Schema ParseJson(JToken jtok, SchemaNames names, string encspace return LogicalSchema.NewInstance(jtok, props, names, encspace); Schema schema = PrimitiveSchema.NewInstance((string)type, props); - if (null != schema) return schema; + if (null != schema) + return schema; return NamedSchema.NewInstance(jo, props, names, encspace); } else if (jtype.Type == JTokenType.Array) return UnionSchema.NewInstance(jtype as JArray, props, names, encspace); - else if (jtype.Type == JTokenType.Object && null != jo["logicalType"]) // logical type based on a complex type - return LogicalSchema.NewInstance(jtok, props, names, encspace); + else if (jtype.Type == JTokenType.Object) + { + if (null != jo["logicalType"]) // logical type based on a complex type + { + return LogicalSchema.NewInstance(jtok, props, names, encspace); + } + + var schema = ParseJson(jtype, names, encspace); // primitive schemas are allowed to have additional metadata properties + if (schema is PrimitiveSchema) + { + return schema; + } + } } throw new AvroTypeException($"Invalid JSON for schema: {jtok} at '{jtok.Path}'"); } @@ -216,7 +228,20 @@ internal static Schema ParseJson(JToken jtok, SchemaNames names, string encspace public static Schema Parse(string json) { if (string.IsNullOrEmpty(json)) throw new ArgumentNullException(nameof(json), "json cannot be null."); - return Parse(json.Trim(), new SchemaNames(), null); // standalone schema, so no enclosing namespace + return ParseInternal(json.Trim(), new SchemaNames(), null); // standalone schema, so no enclosing namespace + } + + /// + /// Parses a JSON string to create a new schema object + /// + /// JSON string + /// list of named schemas already read + /// enclosing namespace of the schema + /// new Schema object + public static Schema Parse(string json, SchemaNames names, string encspace = null) + { + if (string.IsNullOrEmpty(json)) throw new ArgumentNullException(nameof(json), "json cannot be null."); + return ParseInternal(json.Trim(), names, encspace); // standalone schema, so no enclosing namespace } /// @@ -226,7 +251,7 @@ public static Schema Parse(string json) /// list of named schemas already read /// enclosing namespace of the schema /// new Schema object - internal static Schema Parse(string json, SchemaNames names, string encspace) + internal static Schema ParseInternal(string json, SchemaNames names, string encspace) { Schema sc = PrimitiveSchema.NewInstance(json); if (null != sc) return sc; @@ -369,5 +394,91 @@ protected static int getHashCode(object obj) { return obj == null ? 0 : obj.GetHashCode(); } + + /// + /// Parses the Schema.Type from a string. + /// + /// The type to convert. + /// if set to true [remove quotes]. + /// A Schema.Type unless it could not parse then null + /// + /// usage ParseType("string") returns Schema.Type.String + /// + public static Schema.Type? ParseType(string type, bool removeQuotes = false) + { + string newValue = removeQuotes ? RemoveQuotes(type) : type; + + switch (newValue) + { + case "null": + return Schema.Type.Null; + + case "boolean": + return Schema.Type.Boolean; + + case "int": + return Schema.Type.Int; + + case "long": + return Schema.Type.Long; + + case "float": + return Schema.Type.Float; + + case "double": + return Schema.Type.Double; + + case "bytes": + return Schema.Type.Bytes; + + case "string": + return Schema.Type.String; + + case "record": + return Schema.Type.Record; + + case "enumeration": + return Schema.Type.Enumeration; + + case "array": + return Schema.Type.Array; + + case "map": + return Schema.Type.Map; + + case "union": + return Schema.Type.Union; + + case "fixed": + return Schema.Type.Fixed; + + case "error": + return Schema.Type.Error; + + case "logical": + return Schema.Type.Logical; + + default: + return null; + } + } + + /// + /// Removes the quotes from the first position and last position of the string. + /// + /// The value. + /// + /// If string has a quote at the beginning and the end it removes them, + /// otherwise it returns the original string + /// + private static string RemoveQuotes(string value) + { + if(value.StartsWith("\"") && value.EndsWith("\"")) + { + return value.Substring(1, value.Length - 2); + } + + return value; + } } } diff --git a/lang/csharp/src/apache/main/Schema/SchemaName.cs b/lang/csharp/src/apache/main/Schema/SchemaName.cs index ced24ee70fe..7716d7a55ff 100644 --- a/lang/csharp/src/apache/main/Schema/SchemaName.cs +++ b/lang/csharp/src/apache/main/Schema/SchemaName.cs @@ -21,7 +21,7 @@ namespace Avro { /// - /// Class to store schema name, namespace and enclosing namespace + /// Class to store schema name, namespace, enclosing namespace and documentation /// public class SchemaName { @@ -43,6 +43,11 @@ public class SchemaName /// public String EncSpace { get; private set; } + /// + /// Documentation for the schema + /// + public String Documentation { get; private set; } + /// /// Namespace.Name of the schema /// @@ -59,29 +64,30 @@ public class SchemaName /// name of the schema /// namespace of the schema /// enclosing namespace of the schema - public SchemaName(String name, String space, String encspace) + /// documentation of the schema + public SchemaName(string name, string space, string encspace, string documentation) { if (name == null) { // anonymous - this.Name = this.Space = null; - this.EncSpace = encspace; // need to save enclosing namespace for anonymous types, so named types within the anonymous type can be resolved + Name = Space = null; + EncSpace = encspace; // need to save enclosing namespace for anonymous types, so named types within the anonymous type can be resolved } -#pragma warning disable CA1307 // Specify StringComparison else if (!name.Contains(".")) -#pragma warning restore CA1307 // Specify StringComparison { // unqualified name - this.Space = space; // use default space - this.Name = name; - this.EncSpace = encspace; + Space = space; // use default space + Name = name; + EncSpace = encspace; } else { string[] parts = name.Split('.'); - this.Space = string.Join(".", parts, 0, parts.Length - 1); - this.Name = parts[parts.Length - 1]; - this.EncSpace = encspace; + Space = string.Join(".", parts, 0, parts.Length - 1); + Name = parts[parts.Length - 1]; + EncSpace = encspace; } - fullName = string.IsNullOrEmpty(Namespace) ? this.Name : Namespace + "." + this.Name; + + Documentation = documentation; + fullName = string.IsNullOrEmpty(Namespace) ? Name : Namespace + "." + Name; } /// @@ -104,6 +110,7 @@ internal void WriteJson(Newtonsoft.Json.JsonTextWriter writer, SchemaNames names if (null != this.Name) // write only if not anonymous { JsonHelper.writeIfNotNullOrEmpty(writer, "name", this.Name); + JsonHelper.writeIfNotNull(writer, "doc", this.Documentation); if (!String.IsNullOrEmpty(this.Space)) JsonHelper.writeIfNotNullOrEmpty(writer, "namespace", this.Space); else if (!String.IsNullOrEmpty(this.EncSpace)) // need to put enclosing name space for code generated classes @@ -141,9 +148,7 @@ private static bool areEqual(object obj1, object obj2) /// public override int GetHashCode() { -#pragma warning disable CA1307 // Specify StringComparison return string.IsNullOrEmpty(Fullname) ? 0 : 29 * Fullname.GetHashCode(); -#pragma warning restore CA1307 // Specify StringComparison } } @@ -210,11 +215,12 @@ public bool Add(NamedSchema schema) /// name of the schema /// namespace of the schema /// enclosing namespace of the schema + /// documentation for the schema /// schema object found /// true if name is found in the map, false otherwise - public bool TryGetValue(string name, string space, string encspace, out NamedSchema schema) + public bool TryGetValue(string name, string space, string encspace, string documentation, out NamedSchema schema) { - SchemaName schemaname = new SchemaName(name, space, encspace); + SchemaName schemaname = new SchemaName(name, space, encspace, documentation); return Names.TryGetValue(schemaname, out schema); } diff --git a/lang/csharp/src/apache/main/Schema/SchemaNormalization.cs b/lang/csharp/src/apache/main/Schema/SchemaNormalization.cs index 3b12d04ec52..d6c5a45cf12 100644 --- a/lang/csharp/src/apache/main/Schema/SchemaNormalization.cs +++ b/lang/csharp/src/apache/main/Schema/SchemaNormalization.cs @@ -24,13 +24,13 @@ namespace Avro { /// - /// Collection of static methods for generating the cannonical form of schemas. + /// Collection of static methods for generating the canonical form of schemas. /// public static class SchemaNormalization { /// /// Obsolete: This will be removed from the public API in a future version. - /// This should be a private const field, similar to the Java implementation. It appears + /// This should be a private constant field, similar to the Java implementation. It appears /// that this was originally exposed for unit tests. Unit tests should hard-code this value /// rather than access it here. /// @@ -71,7 +71,7 @@ public static string ToParsingForm(Schema s) /// not recognized and an /// ArgumentException is thrown /// - /// Recommended Avro practice dictiates that + /// Recommended Avro practice dictates that /// "CRC-64-AVRO" is used for 64-bit fingerprints, /// "MD5" is used for 128-bit fingerprints, and /// "SHA-256" is used for 256-bit fingerprints. diff --git a/lang/csharp/src/apache/main/Schema/UnionSchema.cs b/lang/csharp/src/apache/main/Schema/UnionSchema.cs index 0ffb5e091f7..af9ba758363 100644 --- a/lang/csharp/src/apache/main/Schema/UnionSchema.cs +++ b/lang/csharp/src/apache/main/Schema/UnionSchema.cs @@ -17,9 +17,8 @@ */ using System; using System.Collections.Generic; -using System.Text; +using System.Linq; using Newtonsoft.Json.Linq; -using Newtonsoft.Json; namespace Avro { @@ -67,15 +66,28 @@ internal static UnionSchema NewInstance(JArray jarr, PropertyMap props, SchemaNa return new UnionSchema(schemas, props); } + /// + /// Creates a new + /// + /// The union schemas + /// Dictionary that provides access to custom properties + /// New + public static UnionSchema Create(List schemas, PropertyMap customProperties = null) + { + return new UnionSchema(schemas, customProperties); + } + /// /// Contructor for union schema /// /// - /// dictionary that provides access to custom properties - private UnionSchema(List schemas, PropertyMap props) : base(Type.Union, props) + /// dictionary that provides access to custom properties + private UnionSchema(List schemas, PropertyMap customProperties) + : base(Type.Union, customProperties) { if (schemas == null) throw new ArgumentNullException(nameof(schemas)); + VerifyChildSchemas(schemas); this.Schemas = schemas; } @@ -115,8 +127,21 @@ public int MatchingBranch(Schema s) { if (s is UnionSchema) throw new AvroException("Cannot find a match against union schema"); // Try exact match. - //for (int i = 0; i < Count; i++) if (Schemas[i].Equals(s)) return i; // removed this for performance's sake - for (int i = 0; i < Count; i++) if (Schemas[i].CanRead(s)) return i; + // CanRead might find a compatible schema which can read. e.g. double and long + for (int i = 0; i < Count; i++) + { + if (Schemas[i].Equals(s)) + { + return i; + } + } + for (int i = 0; i < Count; i++) + { + if (Schemas[i].CanRead(s)) + { + return i; + } + } return -1; } @@ -161,5 +186,20 @@ public override int GetHashCode() result += getHashCode(Props); return result; } + + private void VerifyChildSchemas(List schemas) + { + if (schemas.Any(schema => schema.Tag == Type.Union)) + { + throw new ArgumentException("Unions may not immediately contain other unions", nameof(schemas)); + } + + IGrouping duplicateType = schemas.GroupBy(schema => schema.Fullname).FirstOrDefault(x => x.Count() > 1); + + if (duplicateType != null) + { + throw new ArgumentException($"Duplicate type in union: {duplicateType.Key}"); + } + } } } diff --git a/lang/csharp/src/apache/main/Specific/ObjectCreator.cs b/lang/csharp/src/apache/main/Specific/ObjectCreator.cs index e69a490283d..073b107958a 100644 --- a/lang/csharp/src/apache/main/Specific/ObjectCreator.cs +++ b/lang/csharp/src/apache/main/Specific/ObjectCreator.cs @@ -58,13 +58,6 @@ public sealed class ObjectCreator private readonly Assembly entryAssembly; private readonly bool diffAssembly; - /// - /// Obsolete: This will be removed from the public API in a future version. - /// - /// Obsolete - [Obsolete("This will be removed from the public API in a future version.")] - public delegate object CtorDelegate(); - /// /// Initializes a new instance of the class. /// @@ -78,57 +71,6 @@ public ObjectCreator() diffAssembly = entryAssembly != null && execAssembly != entryAssembly; } -#pragma warning disable CS1591 // Missing XML comment for publicly visible type or member -#pragma warning disable CA1034 // Nested types should not be visible -#pragma warning disable SA1600 // Elements should be documented - /// - /// Obsolete: This will be removed from the public API in a future version. - /// - [Obsolete("This will be removed from the public API in a future version.")] - public struct NameCtorKey : IEquatable - { - public string name { get; private set; } - public Schema.Type type { get; private set; } - public NameCtorKey(string value1, Schema.Type value2) - : this() - { - name = value1; - type = value2; - } - public bool Equals(NameCtorKey other) - { - return Equals(other.name, name) && other.type == type; - } - public override bool Equals(object obj) - { - if (ReferenceEquals(null, obj)) - return false; - if (obj.GetType() != typeof(NameCtorKey)) - return false; - return Equals((NameCtorKey)obj); - } - public override int GetHashCode() - { - unchecked - { -#pragma warning disable CA1307 // Specify StringComparison - return ((name != null ? name.GetHashCode() : 0) * 397) ^ type.GetHashCode(); -#pragma warning restore CA1307 // Specify StringComparison - } - } - public static bool operator ==(NameCtorKey left, NameCtorKey right) - { - return left.Equals(right); - } - public static bool operator !=(NameCtorKey left, NameCtorKey right) - { - return !left.Equals(right); - } - } -#pragma warning restore SA1600 // Elements should be documented -#pragma warning restore CA1034 // Nested types should not be visible -#pragma warning restore CS1591 // Missing XML comment for publicly visible type or member - /// /// Find the type with the given name /// diff --git a/lang/csharp/src/apache/main/Specific/SpecificDatumWriter.cs b/lang/csharp/src/apache/main/Specific/SpecificDatumWriter.cs index bfc88847176..c823253692d 100644 --- a/lang/csharp/src/apache/main/Specific/SpecificDatumWriter.cs +++ b/lang/csharp/src/apache/main/Specific/SpecificDatumWriter.cs @@ -176,6 +176,7 @@ public void WriteArrayValues(object array, WriteItem valueWriter, Encoder encode var list = (IList) array; for (int i = 0; i < list.Count; i++ ) { + encoder.StartItem(); valueWriter(list[i], encoder); } } diff --git a/lang/csharp/src/apache/main/Specific/SpecificReader.cs b/lang/csharp/src/apache/main/Specific/SpecificReader.cs index a8e8e5970bf..1019fa36ced 100644 --- a/lang/csharp/src/apache/main/Specific/SpecificReader.cs +++ b/lang/csharp/src/apache/main/Specific/SpecificReader.cs @@ -72,7 +72,7 @@ public SpecificReader(SpecificDefaultReader reader) /// Generic read function /// /// object to store data read - /// decorder to use for reading data + /// decoder to use for reading data /// public T Read(T reuse, Decoder dec) { @@ -130,20 +130,22 @@ protected override object ReadRecord(object reuse, RecordSchema writerSchema, Sc } } - var defaultStream = new MemoryStream(); - var defaultEncoder = new BinaryEncoder(defaultStream); - var defaultDecoder = new BinaryDecoder(defaultStream); - foreach (Field rf in rs) + using (var defaultStream = new MemoryStream()) { - if (writerSchema.Contains(rf.Name)) continue; + var defaultEncoder = new BinaryEncoder(defaultStream); + var defaultDecoder = new BinaryDecoder(defaultStream); + foreach (Field rf in rs) + { + if (writerSchema.Contains(rf.Name)) continue; - defaultStream.Position = 0; // reset for writing - Resolver.EncodeDefaultValue(defaultEncoder, rf.Schema, rf.DefaultValue); - defaultStream.Flush(); - defaultStream.Position = 0; // reset for reading + defaultStream.Position = 0; // reset for writing + Resolver.EncodeDefaultValue(defaultEncoder, rf.Schema, rf.DefaultValue); + defaultStream.Flush(); + defaultStream.Position = 0; // reset for reading - obj = rec.Get(rf.Pos); - rec.Put(rf.Pos, Read(obj, rf.Schema, rf.Schema, defaultDecoder)); + obj = rec.Get(rf.Pos); + rec.Put(rf.Pos, Read(obj, rf.Schema, rf.Schema, defaultDecoder)); + } } return rec; @@ -155,10 +157,10 @@ protected override object ReadRecord(object reuse, RecordSchema writerSchema, Sc /// /// If appropriate, uses this object instead of creating a new one. /// The FixedSchema the writer used during serialization. - /// The schema that the readr uses. Must be a FixedSchema with the same + /// The schema that the reader uses. Must be a FixedSchema with the same /// size as the writerSchema. /// The decoder for deserialization. - /// The deserilized object. + /// The deserialized object. protected override object ReadFixed(object reuse, FixedSchema writerSchema, Schema readerSchema, Decoder d) { FixedSchema rs = readerSchema as FixedSchema; @@ -220,7 +222,7 @@ protected override object ReadArray(object reuse, ArraySchema writerSchema, Sche } /// - /// Deserialized an avro map. The default implemenation creats a new map using CreateMap() and then + /// Deserialized an avro map. The default implementation creates a new map using CreateMap() and then /// adds elements to the map using AddMapEntry(). /// /// If appropriate, use this instead of creating a new map object. diff --git a/lang/csharp/src/apache/main/Specific/SpecificWriter.cs b/lang/csharp/src/apache/main/Specific/SpecificWriter.cs index b595241f39a..53d6407e947 100644 --- a/lang/csharp/src/apache/main/Specific/SpecificWriter.cs +++ b/lang/csharp/src/apache/main/Specific/SpecificWriter.cs @@ -149,7 +149,7 @@ protected override void WriteMap(MapSchema schema, object value, Encoder encoder if (map == null) throw new AvroTypeException("Map does not implement non-generic IDictionary"); - encoder.WriteArrayStart(); + encoder.WriteMapStart(); encoder.SetItemCount(map.Count); foreach (System.Collections.DictionaryEntry de in map) { diff --git a/lang/csharp/src/apache/main/Util/LocalTimestampMicrosecond.cs b/lang/csharp/src/apache/main/Util/LocalTimestampMicrosecond.cs new file mode 100644 index 00000000000..36014c97aef --- /dev/null +++ b/lang/csharp/src/apache/main/Util/LocalTimestampMicrosecond.cs @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Avro.Util +{ + /// + /// The 'local-timestamp-micros' logical type. + /// + public class LocalTimestampMicrosecond : LogicalUnixEpochType + { + /// + /// The logical type name for LocalTimestampMicrosecond. + /// + public static readonly string LogicalTypeName = "local-timestamp-micros"; + + /// + /// Initializes a new LocalTimestampMicrosecond logical type. + /// + public LocalTimestampMicrosecond() + : base(LogicalTypeName) + { + } + + /// + public override void ValidateSchema(LogicalSchema schema) + { + if (Schema.Type.Long != schema.BaseSchema.Tag) + { + throw new AvroTypeException("'local-timestamp-micros' can only be used with an underlying long type"); + } + } + + /// + public override object ConvertToBaseValue(object logicalValue, LogicalSchema schema) + { + DateTime date = ((DateTime)logicalValue).ToUniversalTime(); + return (date - UnixEpochDateTime).Ticks / TicksPerMicrosecond; + } + + /// + public override object ConvertToLogicalValue(object baseValue, LogicalSchema schema) + { + return UnixEpochDateTime.AddTicks((long)baseValue * TicksPerMicrosecond).ToLocalTime(); + } + } +} diff --git a/lang/csharp/src/apache/main/Util/LocalTimestampMillisecond.cs b/lang/csharp/src/apache/main/Util/LocalTimestampMillisecond.cs new file mode 100644 index 00000000000..4ae86fd087b --- /dev/null +++ b/lang/csharp/src/apache/main/Util/LocalTimestampMillisecond.cs @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System; + +namespace Avro.Util +{ + /// + /// The 'local-timestamp-millis' logical type. + /// + public class LocalTimestampMillisecond : LogicalUnixEpochType + { + /// + /// The logical type name for LocalTimestampMillisecond. + /// + public static readonly string LogicalTypeName = "local-timestamp-millis"; + + /// + /// Initializes a new LocalTimestampMillisecond logical type. + /// + public LocalTimestampMillisecond() + : base(LogicalTypeName) + { + } + + /// + public override void ValidateSchema(LogicalSchema schema) + { + if (Schema.Type.Long != schema.BaseSchema.Tag) + { + throw new AvroTypeException("'local-timestamp-millis' can only be used with an underlying long type"); + } + } + + /// + public override object ConvertToBaseValue(object logicalValue, LogicalSchema schema) + { + DateTime date = ((DateTime)logicalValue).ToUniversalTime(); + return (long)(date - UnixEpochDateTime).TotalMilliseconds; + } + + /// + public override object ConvertToLogicalValue(object baseValue, LogicalSchema schema) + { + return UnixEpochDateTime.AddMilliseconds((long)baseValue).ToLocalTime(); + } + } +} diff --git a/lang/csharp/src/apache/main/Util/LogicalTypeFactory.cs b/lang/csharp/src/apache/main/Util/LogicalTypeFactory.cs index e7fa3155641..e92d48ff7aa 100644 --- a/lang/csharp/src/apache/main/Util/LogicalTypeFactory.cs +++ b/lang/csharp/src/apache/main/Util/LogicalTypeFactory.cs @@ -39,6 +39,8 @@ private LogicalTypeFactory() { { Decimal.LogicalTypeName, new Decimal() }, { Date.LogicalTypeName, new Date() }, + { LocalTimestampMillisecond.LogicalTypeName, new LocalTimestampMillisecond() }, + { LocalTimestampMicrosecond.LogicalTypeName, new LocalTimestampMicrosecond() }, { TimeMillisecond.LogicalTypeName, new TimeMillisecond() }, { TimeMicrosecond.LogicalTypeName, new TimeMicrosecond() }, { TimestampMillisecond.LogicalTypeName, new TimestampMillisecond() }, @@ -65,22 +67,22 @@ public void Register(LogicalType logicalType) /// A . public LogicalType GetFromLogicalSchema(LogicalSchema schema, bool ignoreInvalidOrUnknown = false) { - try - { - if (!_logicalTypes.TryGetValue(schema.LogicalTypeName, out LogicalType logicalType)) - throw new AvroTypeException("Logical type '" + schema.LogicalTypeName + "' is not supported."); + LogicalType logicalType = null; + if (_logicalTypes.TryGetValue(schema.LogicalTypeName, out logicalType)) + { logicalType.ValidateSchema(schema); - - return logicalType; } - catch (AvroTypeException) + else if (ignoreInvalidOrUnknown) + { + logicalType = new UnknownLogicalType(schema); + } + else { - if (!ignoreInvalidOrUnknown) - throw; + throw new AvroTypeException("Logical type '" + schema.LogicalTypeName + "' is not supported."); } - return null; + return logicalType; } } } diff --git a/lang/csharp/src/apache/main/Util/LogicalUnixEpochType.cs b/lang/csharp/src/apache/main/Util/LogicalUnixEpochType.cs index f4187d070ca..f88b733fd35 100644 --- a/lang/csharp/src/apache/main/Util/LogicalUnixEpochType.cs +++ b/lang/csharp/src/apache/main/Util/LogicalUnixEpochType.cs @@ -31,6 +31,11 @@ public abstract class LogicalUnixEpochType : LogicalType /// protected static readonly DateTime UnixEpochDateTime = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc); + /// + /// Number of ticks per microsecond. + /// + protected const long TicksPerMicrosecond = TimeSpan.TicksPerMillisecond / 1000; + /// /// Initializes the base logical type. /// diff --git a/lang/csharp/src/apache/main/Util/TimeMicrosecond.cs b/lang/csharp/src/apache/main/Util/TimeMicrosecond.cs index f561d6ff8d6..c3226f625ef 100644 --- a/lang/csharp/src/apache/main/Util/TimeMicrosecond.cs +++ b/lang/csharp/src/apache/main/Util/TimeMicrosecond.cs @@ -25,8 +25,8 @@ namespace Avro.Util /// public class TimeMicrosecond : LogicalUnixEpochType { - private static readonly TimeSpan _maxTime = new TimeSpan(23, 59, 59); - + private static readonly TimeSpan _exclusiveUpperBound = TimeSpan.FromDays(1); + /// /// The logical type name for TimeMicrosecond. /// @@ -50,17 +50,29 @@ public override object ConvertToBaseValue(object logicalValue, LogicalSchema sch { var time = (TimeSpan)logicalValue; - if (time > _maxTime) - throw new ArgumentOutOfRangeException(nameof(logicalValue), "A 'time-micros' value can only have the range '00:00:00' to '23:59:59'."); + ThrowIfOutOfRange(time, nameof(logicalValue)); - return (long)(time - UnixEpochDateTime.TimeOfDay).TotalMilliseconds * 1000; + // Note: UnixEpochDateTime.TimeOfDay is '00:00:00'. This could be 'return time.Ticks / TicksPerMicrosecond'; + return (time - UnixEpochDateTime.TimeOfDay).Ticks / TicksPerMicrosecond; } /// public override object ConvertToLogicalValue(object baseValue, LogicalSchema schema) { - var noMs = (long)baseValue / 1000; - return UnixEpochDateTime.TimeOfDay.Add(TimeSpan.FromMilliseconds(noMs)); + var time = TimeSpan.FromTicks((long)baseValue * TicksPerMicrosecond); + + ThrowIfOutOfRange(time, nameof(baseValue)); + + // Note: UnixEpochDateTime.TimeOfDay is '00:00:00', so the Add is meaningless. This could be 'return time;' + return UnixEpochDateTime.TimeOfDay.Add(time); + } + + private static void ThrowIfOutOfRange(TimeSpan time, string paramName) + { + if (time.Ticks < 0 || time >= _exclusiveUpperBound) + { + throw new ArgumentOutOfRangeException(paramName, $"A '{LogicalTypeName}' value must be at least '{TimeSpan.Zero}' and less than '{_exclusiveUpperBound}'."); + } } } } diff --git a/lang/csharp/src/apache/main/Util/TimeMillisecond.cs b/lang/csharp/src/apache/main/Util/TimeMillisecond.cs index 9008fa38abf..d3132560063 100644 --- a/lang/csharp/src/apache/main/Util/TimeMillisecond.cs +++ b/lang/csharp/src/apache/main/Util/TimeMillisecond.cs @@ -25,7 +25,7 @@ namespace Avro.Util /// public class TimeMillisecond : LogicalUnixEpochType { - private static readonly TimeSpan _maxTime = new TimeSpan(23, 59, 59); + private static readonly TimeSpan _exclusiveUpperBound = TimeSpan.FromDays(1); /// /// The logical type name for TimeMillisecond. @@ -50,17 +50,29 @@ public override object ConvertToBaseValue(object logicalValue, LogicalSchema sch { var time = (TimeSpan)logicalValue; - if (time > _maxTime) - throw new ArgumentOutOfRangeException(nameof(logicalValue), "A 'time-millis' value can only have the range '00:00:00' to '23:59:59'."); + ThrowIfOutOfRange(time, nameof(logicalValue)); + // Note: UnixEpochDateTime.TimeOfDay is '00:00:00'. This could be 'return time.TotalMilliseconds; return (int)(time - UnixEpochDateTime.TimeOfDay).TotalMilliseconds; } /// public override object ConvertToLogicalValue(object baseValue, LogicalSchema schema) { - var noMs = (int)baseValue; - return UnixEpochDateTime.TimeOfDay.Add(TimeSpan.FromMilliseconds(noMs)); + var time = TimeSpan.FromMilliseconds((int)baseValue); + + ThrowIfOutOfRange(time, nameof(baseValue)); + + // Note: UnixEpochDateTime.TimeOfDay is '00:00:00'. This could be 'return time;' + return UnixEpochDateTime.TimeOfDay.Add(time); + } + + private static void ThrowIfOutOfRange(TimeSpan time, string paramName) + { + if (time.Ticks < 0 || time >= _exclusiveUpperBound) + { + throw new ArgumentOutOfRangeException(paramName, $"A '{LogicalTypeName}' value must be at least '{TimeSpan.Zero}' and less than '{_exclusiveUpperBound}'."); + } } } } diff --git a/lang/csharp/src/apache/main/Util/TimestampMicrosecond.cs b/lang/csharp/src/apache/main/Util/TimestampMicrosecond.cs index 54a421a5d71..4d8b1cc6cad 100644 --- a/lang/csharp/src/apache/main/Util/TimestampMicrosecond.cs +++ b/lang/csharp/src/apache/main/Util/TimestampMicrosecond.cs @@ -47,14 +47,13 @@ public override void ValidateSchema(LogicalSchema schema) public override object ConvertToBaseValue(object logicalValue, LogicalSchema schema) { var date = ((DateTime)logicalValue).ToUniversalTime(); - return (long)((date - UnixEpochDateTime).TotalMilliseconds * 1000); + return (date - UnixEpochDateTime).Ticks / TicksPerMicrosecond; } /// public override object ConvertToLogicalValue(object baseValue, LogicalSchema schema) { - var noMs = (long)baseValue / 1000; - return UnixEpochDateTime.AddMilliseconds(noMs); + return UnixEpochDateTime.AddTicks((long)baseValue * TicksPerMicrosecond); } } } diff --git a/lang/csharp/src/apache/main/Util/UnknownLogicalType.cs b/lang/csharp/src/apache/main/Util/UnknownLogicalType.cs new file mode 100644 index 00000000000..5cc03fe2895 --- /dev/null +++ b/lang/csharp/src/apache/main/Util/UnknownLogicalType.cs @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Avro.Util +{ + /// + /// Class UnknownLogicalType. + /// Implements the + /// + /// + public class UnknownLogicalType : LogicalType + { + /// + /// Gets the schema. + /// + /// The schema. + public LogicalSchema Schema { get; } + + /// + /// Initializes a new instance of the class. + /// + /// The schema. + public UnknownLogicalType(LogicalSchema schema) : base(schema.LogicalTypeName) + { + this.Schema = schema; + } + + /// + /// Converts a logical value to an instance of its base type. + /// + /// The logical value to convert. + /// The schema that represents the target of the conversion. + /// An object representing the encoded value of the base type. + public override object ConvertToBaseValue(object logicalValue, LogicalSchema schema) + { + return logicalValue; + } + + /// + /// Converts a base value to an instance of the logical type. + /// + /// The base value to convert. + /// The schema that represents the target of the conversion. + /// An object representing the encoded value of the logical type. + public override object ConvertToLogicalValue(object baseValue, LogicalSchema schema) + { + switch (schema.Name) + { + case @"string": + return (System.String)baseValue; + case @"boolean": + return (System.Boolean)baseValue; + case @"int": + return (System.Int32)baseValue; + case @"long": + return (System.Int64)baseValue; + case @"float": + return (System.Single)baseValue; + case @"double": + return (System.Double)baseValue; + case @"bytes": + return (System.Byte[])baseValue; + default: + return baseValue; + } + } + + /// + /// Retrieve the .NET type that is represented by the logical type implementation. + /// + /// A flag indicating whether it should be nullible. + /// Type. + public override Type GetCSharpType(bool nullible) + { + // handle all Primitive Types + switch (this.Schema.BaseSchema.Name) + { + case @"string": + return typeof(System.String); + case @"boolean": + return nullible ? typeof(System.Boolean?) : typeof(System.Boolean); + case @"int": + return nullible ? typeof(System.Int32?) : typeof(System.Int32); + case @"long": + return nullible ? typeof(System.Int64?) : typeof(System.Int64); + case @"float": + return nullible ? typeof(System.Single?) : typeof(System.Single); + case @"double": + return nullible ? typeof(System.Double?) : typeof(System.Double); + case @"bytes": + return typeof(System.Byte[]); + default: + return typeof(System.Object); + } + } + + /// + /// Determines if a given object is an instance of the logical type. + /// + /// The logical value to test. + /// true if [is instance of logical type] [the specified logical value]; otherwise, false. + public override bool IsInstanceOfLogicalType(object logicalValue) + { + // handle all Primitive Types + switch (this.Schema.BaseSchema.Name) + { + case @"string": + return logicalValue is System.String; + case @"boolean": + return logicalValue is System.Boolean; + case @"int": + return logicalValue is System.Int32; + case @"long": + return logicalValue is System.Int64; + case @"float": + return logicalValue is System.Single; + case @"double": + return logicalValue is System.Double; + case @"bytes": + return logicalValue is System.Byte[]; + default: + return true; + } + } + + } +} diff --git a/lang/csharp/src/apache/msbuild/Avro.msbuild.csproj b/lang/csharp/src/apache/msbuild/Avro.msbuild.csproj index 7ba943cacd7..7f06f64dc65 100644 --- a/lang/csharp/src/apache/msbuild/Avro.msbuild.csproj +++ b/lang/csharp/src/apache/msbuild/Avro.msbuild.csproj @@ -16,11 +16,10 @@ --> - + - netstandard2.0 - net461;netstandard2.0 + $(DefaultLibraryTargetFrameworks) Avro.msbuild Avro.msbuild false @@ -33,17 +32,11 @@ $(NoWarn);NU5104 - + - - - - - - diff --git a/lang/csharp/src/apache/perf/Avro.perf.csproj b/lang/csharp/src/apache/perf/Avro.perf.csproj index 00b94aa298e..cae41aaa6e7 100644 --- a/lang/csharp/src/apache/perf/Avro.perf.csproj +++ b/lang/csharp/src/apache/perf/Avro.perf.csproj @@ -16,10 +16,11 @@ --> + + Exe - net5.0 - net461;net5.0 + $(DefaultExeTargetFrameworks) Avro.perf Avro.perf false diff --git a/lang/csharp/src/apache/test/Avro.test.csproj b/lang/csharp/src/apache/test/Avro.test.csproj index ff2cfa09fef..3ba3a0ffa89 100644 --- a/lang/csharp/src/apache/test/Avro.test.csproj +++ b/lang/csharp/src/apache/test/Avro.test.csproj @@ -16,15 +16,16 @@ --> - + - netcoreapp2.1;netcoreapp3.1;net5.0 - net461;netcoreapp2.1;netcoreapp3.1;net5.0 + $(DefaultUnitTestTargetFrameworks) Avro.test Avro.test false false + True + ..\..\..\Avro.snk @@ -33,18 +34,32 @@ + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + + - - + + - + + + + diff --git a/lang/csharp/src/apache/test/AvroDecimalTest.cs b/lang/csharp/src/apache/test/AvroDecimalTest.cs index e10210bf31c..c6d0d3a807e 100644 --- a/lang/csharp/src/apache/test/AvroDecimalTest.cs +++ b/lang/csharp/src/apache/test/AvroDecimalTest.cs @@ -15,6 +15,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +using System.Globalization; using NUnit.Framework; namespace Avro.test @@ -22,25 +24,72 @@ namespace Avro.test [TestFixture] class AvroDecimalTest { - [TestCase(1)] - [TestCase(1000)] - [TestCase(10.10)] - [TestCase(0)] - [TestCase(0.1)] - [TestCase(0.01)] - [TestCase(-1)] - [TestCase(-1000)] - [TestCase(-10.10)] - [TestCase(-0.1)] - [TestCase(-0.01)] - public void TestAvroDecimalToString(decimal value) + //Use strings as parameters as otherwise doubles will be used intermediately by C# and scale will be lost in this process + [TestCase("1")] + [TestCase("1000")] + [TestCase("10.10")] + [TestCase("0")] + [TestCase("0.1")] + [TestCase("0.01")] + [TestCase("-1")] + [TestCase("-1000")] + [TestCase("-10.10")] + [TestCase("-0.1")] + [TestCase("-0.01")] + public void TestAvroDecimalToString(string value) + { + var valueDecimal = decimal.Parse(value, CultureInfo.InvariantCulture); + var valueString = valueDecimal.ToString(); + + var avroDecimal = new AvroDecimal(valueDecimal); + var avroDecimalString = avroDecimal.ToString(); + + Assert.AreEqual(valueString, avroDecimalString); + } + + [Test] + public void TestHighPrecisionAvroDecimalToString() { + var value = 4.1748330066797328106875724512m; // High precision decimal value var valueString = value.ToString(); - var avroDecimal = new AvroDecimal(value); + var avroDecimal = new AvroDecimal(value); var avroDecimalString = avroDecimal.ToString(); Assert.AreEqual(valueString, avroDecimalString); + + value = -4.1748330066797328106875724512m; // High precision decimal value + valueString = value.ToString(); + + avroDecimal = new AvroDecimal(value); + avroDecimalString = avroDecimal.ToString(); + + Assert.AreEqual(valueString, avroDecimalString); + } + + //Use strings as parameters as otherwise doubles will be used intermediately by C# and scale will be lost in this process + [TestCase("0", "0", ExpectedResult = 0)] + [TestCase("1", "0", ExpectedResult = 1)] + [TestCase("0", "1", ExpectedResult = -1)] + [TestCase("1.0", "1.0", ExpectedResult = 0)] + [TestCase("1.0", "1", ExpectedResult = 0)] + [TestCase("1", "1.0", ExpectedResult = 0)] + [TestCase("1.0", "0", ExpectedResult = 1)] + [TestCase("0", "1.0", ExpectedResult = -1)] + [TestCase("-0.5", "-1.0", ExpectedResult = 1)] + [TestCase("-1.0", "-0.5", ExpectedResult = -1)] + [TestCase("0.1", "0.01", ExpectedResult = 1)] + [TestCase("0.01", "0.1", ExpectedResult = -1)] + [TestCase("-0.1", "-0.01", ExpectedResult = -1)] + [TestCase("-0.01", "-0.1", ExpectedResult = 1)] + public int TestAvroDecimalCompareTo(string left, string right) + { + var leftDecimal = decimal.Parse(left, CultureInfo.InvariantCulture); + var rightDecimal = decimal.Parse(right, CultureInfo.InvariantCulture); + var leftAvroDecimal = new AvroDecimal(leftDecimal); + var rightAvroDecimal = new AvroDecimal(rightDecimal); + + return leftAvroDecimal.CompareTo(rightAvroDecimal); } } } diff --git a/lang/csharp/src/apache/test/AvroGen/AvroGenHelper.cs b/lang/csharp/src/apache/test/AvroGen/AvroGenHelper.cs new file mode 100644 index 00000000000..0ce66020717 --- /dev/null +++ b/lang/csharp/src/apache/test/AvroGen/AvroGenHelper.cs @@ -0,0 +1,297 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Reflection; +using System.Text; +using Microsoft.CodeAnalysis; +using Microsoft.CodeAnalysis.CSharp; +using Microsoft.CodeAnalysis.Emit; +using NUnit.Framework; +using Avro.Specific; + +namespace Avro.Test.AvroGen +{ + class AvroGenToolResult + { + public int ExitCode { get; set; } + public string[] StdOut { get; set; } + public string[] StdErr { get; set; } + } + + class AvroGenHelper + { + public static AvroGenToolResult RunAvroGenTool(params string[] args) + { + // Save stdout and stderr + TextWriter conOut = Console.Out; + TextWriter conErr = Console.Error; + + try + { + AvroGenToolResult result = new AvroGenToolResult(); + StringBuilder strBuilderOut = new StringBuilder(); + StringBuilder strBuilderErr = new StringBuilder(); + + using (StringWriter writerOut = new StringWriter(strBuilderOut)) + using (StringWriter writerErr = new StringWriter(strBuilderErr)) + { + writerOut.NewLine = "\n"; + writerErr.NewLine = "\n"; + + // Overwrite stdout and stderr to be able to capture console output + Console.SetOut(writerOut); + Console.SetError(writerErr); + + result.ExitCode = AvroGenTool.Main(args.ToArray()); + + writerOut.Flush(); + writerErr.Flush(); + + result.StdOut = strBuilderOut.Length == 0 ? Array.Empty() : strBuilderOut.ToString().Split(writerOut.NewLine); + result.StdErr = strBuilderErr.Length == 0 ? Array.Empty() : strBuilderErr.ToString().Split(writerErr.NewLine); + } + + return result; + } + finally + { + // Restore console + Console.SetOut(conOut); + Console.SetError(conErr); + } + } + + public static Assembly CompileCSharpFilesIntoLibrary(IEnumerable sourceFiles, string assemblyName = null, bool loadAssembly = true) + { + // Create random assembly name if not specified + if (assemblyName == null) + assemblyName = Path.GetRandomFileName(); + + // Base path to assemblies .NET assemblies + var assemblyPath = Path.GetDirectoryName(typeof(object).Assembly.Location); + + using (var compilerStream = new MemoryStream()) + { + List assemblies = new List() + { + typeof(object).Assembly.Location, + typeof(Schema).Assembly.Location, + typeof(System.CodeDom.Compiler.GeneratedCodeAttribute).Assembly.Location, + Path.Combine(assemblyPath, "System.Runtime.dll"), + Path.Combine(assemblyPath, "netstandard.dll") + }; + + // Create compiler + CSharpCompilation compilation = CSharpCompilation + .Create(assemblyName) + .WithOptions(new CSharpCompilationOptions(OutputKind.DynamicallyLinkedLibrary)) + .AddReferences(assemblies.Select(path => MetadataReference.CreateFromFile(path))) + .AddSyntaxTrees(sourceFiles.Select(sourceFile => + { + string sourceText = System.IO.File.ReadAllText(sourceFile); + return CSharpSyntaxTree.ParseText(sourceText); + })); + + // Compile + EmitResult compilationResult = compilation.Emit(compilerStream); + +#if DEBUG + if (!compilationResult.Success) + { + foreach (Diagnostic diagnostic in compilationResult.Diagnostics) + { + if (diagnostic.IsWarningAsError || diagnostic.Severity == DiagnosticSeverity.Error) + { + TestContext.WriteLine($"{diagnostic.Id} - {diagnostic.GetMessage()} - {diagnostic.Location}"); + } + } + } +#endif + + Assert.That(compilationResult.Success, Is.True); + + if (!loadAssembly) + { + return null; + } + + // Load assembly from stream + compilerStream.Seek(0, SeekOrigin.Begin); + return Assembly.Load(compilerStream.ToArray()); + } + } + + public static string CreateEmptyTemporaryFolder(out string uniqueId, string path = null) + { + // Create unique id + uniqueId = Guid.NewGuid().ToString(); + + // Temporary folder name in working folder or the specified path + string tempFolder = Path.Combine(path ?? TestContext.CurrentContext.WorkDirectory, uniqueId); + + // Create folder + Directory.CreateDirectory(tempFolder); + + // Make sure it is empty + Assert.That(new DirectoryInfo(tempFolder), Is.Empty); + + return tempFolder; + } + + public static Assembly CompileCSharpFilesAndCheckTypes( + string outputDir, + string assemblyName, + IEnumerable typeNamesToCheck = null, + IEnumerable generatedFilesToCheck = null) + { + // Check if all generated files exist + if (generatedFilesToCheck != null) + { + foreach (string generatedFile in generatedFilesToCheck) + { + Assert.That(new FileInfo(Path.Combine(outputDir, generatedFile)), Does.Exist); + } + } + + // Compile into netstandard library and load assembly + Assembly assembly = CompileCSharpFilesIntoLibrary( + new DirectoryInfo(outputDir) + .EnumerateFiles("*.cs", SearchOption.AllDirectories) + .Select(fi => fi.FullName), + assemblyName); + + if (typeNamesToCheck != null) + { + // Check if the compiled code has the same number of types defined as the check list + // Note: Ignore types which are injected by the compiler (System.* and Microsoft.*), e.g. Microsoft.CodeAnalysis.EmbeddedAttribute + Assert.That( + typeNamesToCheck.Count(), + Is.EqualTo( + assembly + .DefinedTypes + .Where(t => + { + return !t.Namespace.StartsWith("Microsoft.", StringComparison.OrdinalIgnoreCase) && + !t.Namespace.StartsWith("System.", StringComparison.OrdinalIgnoreCase); + }) + .Count())); + + // Check if types available in compiled assembly + foreach (string typeName in typeNamesToCheck) + { + Type type = assembly.GetType(typeName); + Assert.That(type, Is.Not.Null); + + // Protocols are abstract and cannot be instantiated + if (typeof(ISpecificProtocol).IsAssignableFrom(type)) + { + Assert.That(type.IsAbstract, Is.True); + + // If directly inherited from ISpecificProtocol, use reflection to read static private field + // holding the protocol. Callback objects are not directly inherited from ISpecificProtocol, + // so private fields in the base class cannot be accessed + if (type.BaseType.Equals(typeof(ISpecificProtocol))) + { + // Use reflection to read static field, holding the protocol + FieldInfo protocolField = type.GetField("protocol", BindingFlags.NonPublic | BindingFlags.Static); + Protocol protocol = protocolField.GetValue(null) as Protocol; + + Assert.That(protocol, Is.Not.Null); + } + } + else + { + Assert.That(type.IsClass || type.IsEnum, Is.True); + + // Instantiate object + object obj = Activator.CreateInstance(type); + Assert.That(obj, Is.Not.Null); + + // If ISpecificRecord, call its member for sanity check + if (obj is ISpecificRecord record) + { + // Read record's schema object + Assert.That(record.Schema, Is.Not.Null); + // Force exception by reading/writing invalid field + Assert.Throws(() => record.Get(-1)); + Assert.Throws(() => record.Put(-1, null)); + } + } + } + } + + return assembly; + } + + public static Assembly TestSchema( + string schema, + IEnumerable typeNamesToCheck = null, + IEnumerable> namespaceMapping = null, + IEnumerable generatedFilesToCheck = null, + bool skipDirectories = false) + { + // Create temp folder + string outputDir = CreateEmptyTemporaryFolder(out string uniqueId); + + try + { + // Save schema + string schemaFileName = Path.Combine(outputDir, $"{uniqueId}.avsc"); + System.IO.File.WriteAllText(schemaFileName, schema); + + // Generate from schema file + Assert.That(AvroGenTool.GenSchema(schemaFileName, outputDir, namespaceMapping ?? new Dictionary(), skipDirectories), Is.EqualTo(0)); + + return CompileCSharpFilesAndCheckTypes(outputDir, uniqueId, typeNamesToCheck, generatedFilesToCheck); + } + finally + { + Directory.Delete(outputDir, true); + } + } + + public static Assembly TestProtocol( + string protocol, + IEnumerable typeNamesToCheck = null, + IEnumerable> namespaceMapping = null, + IEnumerable generatedFilesToCheck = null) + { + // Create temp folder + string outputDir = CreateEmptyTemporaryFolder(out string uniqueId); + + try + { + // Save protocol + string schemaFileName = Path.Combine(outputDir, $"{uniqueId}.avpr"); + System.IO.File.WriteAllText(schemaFileName, protocol); + + // Generate from protocol file + Assert.That(AvroGenTool.GenProtocol(schemaFileName, outputDir, namespaceMapping ?? new Dictionary()), Is.EqualTo(0)); + + return CompileCSharpFilesAndCheckTypes(outputDir, uniqueId, typeNamesToCheck, generatedFilesToCheck); + } + finally + { + Directory.Delete(outputDir, true); + } + } + } +} diff --git a/lang/csharp/src/apache/test/AvroGen/AvroGenProtocolTests.cs b/lang/csharp/src/apache/test/AvroGen/AvroGenProtocolTests.cs new file mode 100644 index 00000000000..b408650369f --- /dev/null +++ b/lang/csharp/src/apache/test/AvroGen/AvroGenProtocolTests.cs @@ -0,0 +1,517 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System.Collections.Generic; +using NUnit.Framework; + +namespace Avro.Test.AvroGen +{ + [TestFixture] + + class AvroGenProtocolTests + { + private const string _baseball = @" +{ + ""protocol"" : ""Baseball"", + ""namespace"" : ""avro.examples.baseball"", + ""doc"" : ""Licensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements. See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership. The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\""License\""); you may not use this file except in compliance\nwith the License. You may obtain a copy of the License at\n\n https://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \""AS IS\"" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License."", + ""types"" : [ { + ""type"" : ""enum"", + ""name"" : ""Position"", + ""symbols"" : [ ""P"", ""C"", ""B1"", ""B2"", ""B3"", ""SS"", ""LF"", ""CF"", ""RF"", ""DH"" ] + }, { + ""type"" : ""record"", + ""name"" : ""Player"", + ""fields"" : [ { + ""name"" : ""number"", + ""type"" : ""int"" + }, { + ""name"" : ""first_name"", + ""type"" : ""string"" + }, { + ""name"" : ""last_name"", + ""type"" : ""string"" + }, { + ""name"" : ""position"", + ""type"" : { + ""type"" : ""array"", + ""items"" : ""Position"" + } + } ] + } ], + ""messages"" : { + } +} +"; + private const string _comments = @" +{ + ""protocol"" : ""Comments"", + ""namespace"" : ""testing"", + ""types"" : [ { + ""type"" : ""enum"", + ""name"" : ""DocumentedEnum"", + ""doc"" : ""Documented Enum"", + ""symbols"" : [ ""A"", ""B"", ""C"" ], + ""default"" : ""A"" + }, { + ""type"" : ""enum"", + ""name"" : ""UndocumentedEnum"", + ""symbols"" : [ ""D"", ""E"" ] + }, { + ""type"" : ""fixed"", + ""name"" : ""DocumentedFixed"", + ""doc"" : ""Documented Fixed Type"", + ""size"" : 16 + }, { + ""type"" : ""fixed"", + ""name"" : ""UndocumentedFixed"", + ""size"" : 16 + }, { + ""type"" : ""error"", + ""name"" : ""DocumentedError"", + ""doc"" : ""Documented Error"", + ""fields"" : [ { + ""name"" : ""reason"", + ""type"" : ""string"", + ""doc"" : ""Documented Reason Field"" + }, { + ""name"" : ""explanation"", + ""type"" : ""string"", + ""doc"" : ""Default Doc Explanation Field"" + } ] + }, { + ""type"" : ""record"", + ""name"" : ""UndocumentedRecord"", + ""fields"" : [ { + ""name"" : ""description"", + ""type"" : ""string"" + } ] + } ], + ""messages"" : { + ""documentedMethod"" : { + ""doc"" : ""Documented Method"", + ""request"" : [ { + ""name"" : ""message"", + ""type"" : ""string"", + ""doc"" : ""Documented Parameter"" + }, { + ""name"" : ""defMsg"", + ""type"" : ""string"", + ""doc"" : ""Default Documented Parameter"" + } ], + ""response"" : ""null"", + ""errors"" : [ ""DocumentedError"" ] + }, + ""undocumentedMethod"" : { + ""request"" : [ { + ""name"" : ""message"", + ""type"" : ""string"" + } ], + ""response"" : ""null"" + } + } +} +"; + + private const string _interop = @" +{ + ""protocol"" : ""InteropProtocol"", + ""namespace"" : ""org.apache.avro.interop"", + ""doc"" : ""Licensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements. See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership. The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\""License\""); you may not use this file except in compliance\nwith the License. You may obtain a copy of the License at\n\n https://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \""AS IS\"" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License."", + ""types"" : [ { + ""type"" : ""record"", + ""name"" : ""Label"", + ""fields"" : [ { + ""name"" : ""label"", + ""type"" : ""string"" + } ] + }, { + ""type"" : ""enum"", + ""name"" : ""Kind"", + ""symbols"" : [ ""A"", ""B"", ""C"" ] + }, { + ""type"" : ""fixed"", + ""name"" : ""MD5"", + ""size"" : 16 + }, { + ""type"" : ""record"", + ""name"" : ""Node"", + ""fields"" : [ { + ""name"" : ""label"", + ""type"" : ""string"" + }, { + ""name"" : ""children"", + ""type"" : { + ""type"" : ""array"", + ""items"" : ""Node"" + }, + ""default"" : [ ] + } ] + }, { + ""type"" : ""record"", + ""name"" : ""Interop"", + ""fields"" : [ { + ""name"" : ""intField"", + ""type"" : ""int"", + ""default"" : 1 + }, { + ""name"" : ""longField"", + ""type"" : ""long"", + ""default"" : -1 + }, { + ""name"" : ""stringField"", + ""type"" : ""string"" + }, { + ""name"" : ""boolField"", + ""type"" : ""boolean"", + ""default"" : false + }, { + ""name"" : ""floatField"", + ""type"" : ""float"", + ""default"" : 0.0 + }, { + ""name"" : ""doubleField"", + ""type"" : ""double"", + ""default"" : -1.0E12 + }, { + ""name"" : ""nullField"", + ""type"" : ""null"" + }, { + ""name"" : ""arrayField"", + ""type"" : { + ""type"" : ""array"", + ""items"" : ""double"" + }, + ""default"" : [ ] + }, { + ""name"" : ""mapField"", + ""type"" : { + ""type"" : ""map"", + ""values"" : ""Label"" + } + }, { + ""name"" : ""unionField"", + ""type"" : [ ""boolean"", ""double"", { + ""type"" : ""array"", + ""items"" : ""bytes"" + } ] + }, { + ""name"" : ""enumField"", + ""type"" : ""Kind"" + }, { + ""name"" : ""fixedField"", + ""type"" : ""MD5"" + }, { + ""name"" : ""recordField"", + ""type"" : ""Node"" + } ] + } ], + ""messages"" : { } +} +"; + private const string _namespaces = @" +{ + ""protocol"" : ""TestNamespace"", + ""namespace"" : ""avro.test.protocol"", + ""doc"" : ""Licensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements. See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership. The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\""License\""); you may not use this file except in compliance\nwith the License. You may obtain a copy of the License at\n\n https://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \""AS IS\"" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License."", + ""types"" : [ { + ""type"" : ""fixed"", + ""name"" : ""FixedInOtherNamespace"", + ""namespace"" : ""avro.test.fixed"", + ""size"" : 16 + }, { + ""type"" : ""fixed"", + ""name"" : ""FixedInThisNamespace"", + ""size"" : 16 + }, { + ""type"" : ""record"", + ""name"" : ""RecordInOtherNamespace"", + ""namespace"" : ""avro.test.record"", + ""fields"" : [ ] + }, { + ""type"" : ""error"", + ""name"" : ""ErrorInOtherNamespace"", + ""namespace"" : ""avro.test.error"", + ""fields"" : [ ] + }, { + ""type"" : ""enum"", + ""name"" : ""EnumInOtherNamespace"", + ""namespace"" : ""avro.test.enum"", + ""symbols"" : [ ""FOO"" ] + }, { + ""type"" : ""record"", + ""name"" : ""RefersToOthers"", + ""fields"" : [ { + ""name"" : ""someFixed"", + ""type"" : ""avro.test.fixed.FixedInOtherNamespace"" + }, { + ""name"" : ""someRecord"", + ""type"" : ""avro.test.record.RecordInOtherNamespace"" + }, { + ""name"" : ""someError"", + ""type"" : ""avro.test.error.ErrorInOtherNamespace"" + }, { + ""name"" : ""someEnum"", + ""type"" : ""avro.test.enum.EnumInOtherNamespace"" + }, { + ""name"" : ""thisFixed"", + ""type"" : ""FixedInThisNamespace"" + } ] + } ], + ""messages"" : { + } +} +"; + private const string _forwardRef = @" +{ + ""protocol"": ""Import"", + ""namespace"": ""org.foo"", + ""types"": [ + { + ""type"": ""record"", + ""name"": ""ANameValue"", + ""fields"": [ + { ""name"":""name"", ""type"": ""string"", ""doc"":""the name"" }, + { ""name"": ""value"", ""type"": ""string"", ""doc"": ""the value"" }, + { ""name"": ""type"", ""type"": { ""type"": ""enum"", ""name"":""ValueType"", ""symbols"": [""JSON"",""BASE64BIN"",""PLAIN""] }, ""default"": ""PLAIN"" } + ] + } + ], + ""messages"": { } +} +"; + private const string _unicode = @" +{ + ""protocol"" : ""ĐŸŅ€ĐžŅ‚ĐžĐēĐžĐģŅ‹"", + ""namespace"" : ""org.avro.test"", + ""doc"" : ""This is a test that UTF8 functions correctly.\nこぎテ゚トでは、UTF - 8ã§æ­Ŗã—ãæŠŸčƒŊしãĻいる。\nčŋ™æ˜¯ä¸€ä¸Ēæĩ‹č¯•īŧŒUTF - 8įš„æ­Ŗå¸¸čŋčĄŒã€‚"", + ""types"" : [ { + ""type"" : ""record"", + ""name"" : ""ĐĄŅ‚Ņ€ŅƒĐēŅ‚ŅƒŅ€Đ°"", + ""fields"" : [ { + ""name"" : ""ĐĄŅ‚Ņ€ĐžĐēĐžĐ˛Ņ‹Đš"", + ""type"" : ""string"" + }, { + ""name"" : ""文字列"", + ""type"" : ""string"" + } ] + } ], + ""messages"" : { + } +} +"; + + private const string _myProtocol = @" +{ + ""protocol"" : ""MyProtocol"", + ""namespace"" : ""com.foo"", + ""types"" : [ + { + ""type"" : ""record"", + ""name"" : ""A"", + ""fields"" : [ { ""name"" : ""f1"", ""type"" : ""long"" } ] + }, + { + ""type"" : ""enum"", + ""name"" : ""MyEnum"", + ""symbols"" : [ ""A"", ""B"", ""C"" ] + }, + { + ""type"": ""fixed"", + ""size"": 16, + ""name"": ""MyFixed"" + }, + { + ""type"" : ""record"", + ""name"" : ""Z"", + ""fields"" : + [ + { ""name"" : ""myUInt"", ""type"" : [ ""int"", ""null"" ] }, + { ""name"" : ""myULong"", ""type"" : [ ""long"", ""null"" ] }, + { ""name"" : ""myUBool"", ""type"" : [ ""boolean"", ""null"" ] }, + { ""name"" : ""myUDouble"", ""type"" : [ ""double"", ""null"" ] }, + { ""name"" : ""myUFloat"", ""type"" : [ ""float"", ""null"" ] }, + { ""name"" : ""myUBytes"", ""type"" : [ ""bytes"", ""null"" ] }, + { ""name"" : ""myUString"", ""type"" : [ ""string"", ""null"" ] }, + + { ""name"" : ""myInt"", ""type"" : ""int"" }, + { ""name"" : ""myLong"", ""type"" : ""long"" }, + { ""name"" : ""myBool"", ""type"" : ""boolean"" }, + { ""name"" : ""myDouble"", ""type"" : ""double"" }, + { ""name"" : ""myFloat"", ""type"" : ""float"" }, + { ""name"" : ""myBytes"", ""type"" : ""bytes"" }, + { ""name"" : ""myString"", ""type"" : ""string"" }, + { ""name"" : ""myNull"", ""type"" : ""null"" }, + + { ""name"" : ""myFixed"", ""type"" : ""MyFixed"" }, + { ""name"" : ""myA"", ""type"" : ""A"" }, + { ""name"" : ""myE"", ""type"" : ""MyEnum"" }, + { ""name"" : ""myArray"", ""type"" : { ""type"" : ""array"", ""items"" : ""bytes"" } }, + { ""name"" : ""myArray2"", ""type"" : { ""type"" : ""array"", ""items"" : { ""type"" : ""record"", ""name"" : ""newRec"", ""fields"" : [ { ""name"" : ""f1"", ""type"" : ""long""} ] } } }, + { ""name"" : ""myMap"", ""type"" : { ""type"" : ""map"", ""values"" : ""string"" } }, + { ""name"" : ""myMap2"", ""type"" : { ""type"" : ""map"", ""values"" : ""newRec"" } }, + { ""name"" : ""myObject"", ""type"" : [ ""MyEnum"", ""A"", ""null"" ] }, + { ""name"" : ""myArray3"", ""type"" : { ""type"" : ""array"", ""items"" : { ""type"" : ""array"", ""items"" : [ ""double"", ""string"", ""null"" ] } } } + ] + } + ] +}"; + + [TestCase( + _baseball, + new string[] + { + "avro.examples.baseball.Baseball", + "avro.examples.baseball.BaseballCallback", + "avro.examples.baseball.Player", + "avro.examples.baseball.Position" + }, + new string[] + { + "avro/examples/baseball/Baseball.cs", + "avro/examples/baseball/BaseballCallback.cs", + "avro/examples/baseball/Player.cs", + "avro/examples/baseball/Position.cs" + })] + [TestCase( + _comments, + new string[] + { + "testing.Comments", + "testing.CommentsCallback", + "testing.DocumentedEnum", + "testing.DocumentedError", + "testing.DocumentedFixed", + "testing.UndocumentedEnum", + "testing.UndocumentedFixed", + "testing.UndocumentedRecord" + }, + new string[] + { + "testing/Comments.cs", + "testing/CommentsCallback.cs", + "testing/DocumentedEnum.cs", + "testing/DocumentedError.cs", + "testing/DocumentedFixed.cs", + "testing/UndocumentedEnum.cs", + "testing/UndocumentedFixed.cs", + "testing/UndocumentedRecord.cs" + })] + [TestCase( + _interop, + new string[] + { + "org.apache.avro.interop.Label", + "org.apache.avro.interop.Interop", + "org.apache.avro.interop.InteropProtocol", + "org.apache.avro.interop.InteropProtocolCallback", + "org.apache.avro.interop.Kind", + "org.apache.avro.interop.MD5", + "org.apache.avro.interop.Node", + }, + new string[] + { + "org/apache/avro/interop/Label.cs", + "org/apache/avro/interop/Interop.cs", + "org/apache/avro/interop/InteropProtocol.cs", + "org/apache/avro/interop/InteropProtocolCallback.cs", + "org/apache/avro/interop/Kind.cs", + "org/apache/avro/interop/MD5.cs", + "org/apache/avro/interop/Node.cs", + })] + [TestCase( + _namespaces, + new string[] + { + "avro.test.enum.EnumInOtherNamespace", + "avro.test.error.ErrorInOtherNamespace", + "avro.test.fixed.FixedInOtherNamespace", + "avro.test.protocol.FixedInThisNamespace", + "avro.test.protocol.RefersToOthers", + "avro.test.protocol.TestNamespace", + "avro.test.protocol.TestNamespaceCallback", + "avro.test.record.RecordInOtherNamespace" + }, + new string[] + { + "avro/test/enum/EnumInOtherNamespace.cs", + "avro/test/error/ErrorInOtherNamespace.cs", + "avro/test/fixed/FixedInOtherNamespace.cs", + "avro/test/protocol/FixedInThisNamespace.cs", + "avro/test/protocol/RefersToOthers.cs", + "avro/test/protocol/TestNamespace.cs", + "avro/test/protocol/TestNamespaceCallback.cs", + "avro/test/record/RecordInOtherNamespace.cs" + })] + [TestCase( + _forwardRef, + new string[] + { + "org.foo.ANameValue", + "org.foo.Import", + "org.foo.ImportCallback", + "org.foo.ValueType" + }, + new string[] + { + "org/foo/ANameValue.cs", + "org/foo/Import.cs", + "org/foo/ImportCallback.cs", + "org/foo/ValueType.cs" + })] + [TestCase( + _unicode, + new string[] + { + "org.avro.test.ĐŸŅ€ĐžŅ‚ĐžĐēĐžĐģŅ‹", + "org.avro.test.ĐŸŅ€ĐžŅ‚ĐžĐēĐžĐģŅ‹Callback", + "org.avro.test.ĐĄŅ‚Ņ€ŅƒĐēŅ‚ŅƒŅ€Đ°" + }, + new string[] + { + "org/avro/test/ĐŸŅ€ĐžŅ‚ĐžĐēĐžĐģŅ‹.cs", + "org/avro/test/ĐŸŅ€ĐžŅ‚ĐžĐēĐžĐģŅ‹Callback.cs", + "org/avro/test/ĐĄŅ‚Ņ€ŅƒĐēŅ‚ŅƒŅ€Đ°.cs" + })] + [TestCase( + _myProtocol, + new string[] + { + "com.foo.A", + "com.foo.MyEnum", + "com.foo.MyFixed", + "com.foo.MyProtocol", + "com.foo.MyProtocolCallback", + "com.foo.newRec", + "com.foo.Z" + }, + new string[] + { + "com/foo/A.cs", + "com/foo/MyEnum.cs", + "com/foo/MyFixed.cs", + "com/foo/MyProtocol.cs", + "com/foo/MyProtocolCallback.cs", + "com/foo/newRec.cs", + "com/foo/Z.cs" + })] + public void GenerateProtocol(string protocol, IEnumerable typeNamesToCheck, IEnumerable generatedFilesToCheck) + { + AvroGenHelper.TestProtocol(protocol, typeNamesToCheck, generatedFilesToCheck: generatedFilesToCheck); + } + } +} diff --git a/lang/csharp/src/apache/test/AvroGen/AvroGenSchemaTests.cs b/lang/csharp/src/apache/test/AvroGen/AvroGenSchemaTests.cs new file mode 100644 index 00000000000..2763318c875 --- /dev/null +++ b/lang/csharp/src/apache/test/AvroGen/AvroGenSchemaTests.cs @@ -0,0 +1,819 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System; +using System.IO; +using System.Linq; +using System.Reflection; +using System.Collections.Generic; +using Microsoft.CodeAnalysis; +using NUnit.Framework; +using Avro.Specific; + +namespace Avro.Test.AvroGen +{ + [TestFixture] + + class AvroGenSchemaTests + { + private const string _customConversionWithLogicalTypes = @" +{ + ""namespace"": ""org.apache.avro.codegentest.testdata"", + ""type"": ""record"", + ""name"": ""CustomConversionWithLogicalTypes"", + ""doc"" : ""Test custom conversion and logical types in generated Java classes"", + ""fields"": [ + { + ""name"": ""customEnum"", + ""type"": [""null"", { + ""namespace"": ""org.apache.avro.codegentest.testdata"", + ""name"": ""CustomAvroEnum"", + ""type"": ""enum"", + ""logicalType"": ""custom-enum"", + ""symbols"": [""ONE"", ""TWO"", ""THREE""] + }] + }] +} +"; + + private const string _logicalTypesWithCustomConversion = @" +{ +""namespace"": ""org.apache.avro.codegentest.testdata"", + ""type"": ""record"", + ""name"": ""LogicalTypesWithCustomConversion"", + ""doc"" : ""Test unions with logical types in generated Java classes"", + ""fields"": [ + {""name"": ""nullableCustomField"", ""type"": [""null"", {""type"": ""bytes"", ""logicalType"": ""decimal"", ""precision"": 9, ""scale"": 2}], ""default"": null}, + { ""name"": ""nonNullCustomField"", ""type"": { ""type"": ""bytes"", ""logicalType"": ""decimal"", ""precision"": 9, ""scale"": 2} }, + { ""name"": ""nullableFixedSizeString"", ""type"": [""null"", { ""type"": ""bytes"", ""logicalType"": ""fixed-size-string"", ""minLength"": 1, ""maxLength"": 50}], ""default"": null}, + { ""name"": ""nonNullFixedSizeString"", ""type"": { ""type"": ""bytes"", ""logicalType"": ""fixed-size-string"", ""minLength"": 1, ""maxLength"": 50} } + ] +} +"; + + private const string _logicalTypesWithDefaults = @" +{ +""namespace"": ""org.apache.avro.codegentest.testdata"", + ""type"": ""record"", + ""name"": ""LogicalTypesWithDefaults"", + ""doc"" : ""Test logical types and default values in generated Java classes"", + ""fields"": [ + {""name"": ""nullableDate"", ""type"": [{""type"": ""int"", ""logicalType"": ""date""}, ""null""], ""default"": 1234}, + { ""name"": ""nonNullDate"", ""type"": { ""type"": ""int"", ""logicalType"": ""date""}, ""default"": 1234} + ] +}"; + + private const string _nestedLogicalTypesArray = @" +{""namespace"": ""org.apache.avro.codegentest.testdata"", + ""type"": ""record"", + ""name"": ""NestedLogicalTypesArray"", + ""doc"" : ""Test nested types with logical types in generated Java classes"", + ""fields"": [ + { + ""name"": ""arrayOfRecords"", + ""type"": { + ""type"": ""array"", + ""items"": { + ""namespace"": ""org.apache.avro.codegentest.testdata"", + ""name"": ""RecordInArray"", + ""type"": ""record"", + ""fields"": [ + { + ""name"": ""nullableDateField"", + ""type"": [""null"", {""type"": ""int"", ""logicalType"": ""date""}] + } + ] + } + } + }] +} +"; + + private const string _nestedLogicalTypesMap = @" +{""namespace"": ""org.apache.avro.codegentest.testdata"", + ""type"": ""record"", + ""name"": ""NestedLogicalTypesMap"", + ""doc"" : ""Test nested types with logical types in generated Java classes"", + ""fields"": [ + { + ""name"": ""mapOfRecords"", + ""type"": { + ""type"": ""map"", + ""values"": { + ""namespace"": ""org.apache.avro.codegentest.testdata"", + ""name"": ""RecordInMap"", + ""type"": ""record"", + ""fields"": [ + { + ""name"": ""nullableDateField"", + ""type"": [""null"", {""type"": ""int"", ""logicalType"": ""date""}] + } + ] + } + } + }] +}"; + + private const string _nestedLogicalTypesRecord = @" +{""namespace"": ""org.apache.avro.codegentest.testdata"", + ""type"": ""record"", + ""name"": ""NestedLogicalTypesRecord"", + ""doc"" : ""Test nested types with logical types in generated Java classes"", + ""fields"": [ + { + ""name"": ""nestedRecord"", + ""type"": { + ""namespace"": ""org.apache.avro.codegentest.testdata"", + ""type"": ""record"", + ""name"": ""NestedRecord"", + ""fields"": [ + { + ""name"": ""nullableDateField"", + ""type"": [""null"", {""type"": ""int"", ""logicalType"": ""date""}] + } + ] + } + }] +}"; + + private const string _nestedLogicalTypesUnionFixedDecimal = @" +{""namespace"": ""org.apache.avro.codegentest.testdata"", + ""type"": ""record"", + ""name"": ""NestedLogicalTypesUnionFixedDecimal"", + ""doc"" : ""Test nested types with logical types in generated Java classes"", + ""fields"": [ + { + ""name"": ""unionOfFixedDecimal"", + ""type"": [""null"", { + ""namespace"": ""org.apache.avro.codegentest.testdata"", + ""name"": ""FixedInUnion"", + ""type"": { + ""type"": ""fixed"", + ""size"": 12, + ""name"": ""FixedName"", + }, + ""logicalType"": ""decimal"", + ""precision"": 28, + ""scale"": 15 + }] + }] +}"; + + private const string _nestedLogicalTypesUnion = @" +{""namespace"": ""org.apache.avro.codegentest.testdata"", + ""type"": ""record"", + ""name"": ""NestedLogicalTypesUnion"", + ""doc"" : ""Test nested types with logical types in generated Java classes"", + ""fields"": [ + { + ""name"": ""unionOfRecords"", + ""type"": [""null"", { + ""namespace"": ""org.apache.avro.codegentest.testdata"", + ""name"": ""RecordInUnion"", + ""type"": ""record"", + ""fields"": [ + { + ""name"": ""nullableDateField"", + ""type"": [""null"", {""type"": ""int"", ""logicalType"": ""date""}] + } + ] + }] + }] +}"; + + private const string _nestedSomeNamespaceRecord = @" +{""namespace"": ""org.apache.avro.codegentest.some"", + ""type"": ""record"", + ""name"": ""NestedSomeNamespaceRecord"", + ""doc"" : ""Test nested types with different namespace than the outer type"", + ""fields"": [ + { + ""name"": ""nestedRecord"", + ""type"": { + ""namespace"": ""org.apache.avro.codegentest.other"", + ""type"": ""record"", + ""name"": ""NestedOtherNamespaceRecord"", + ""fields"": [ + { + ""name"": ""someField"", + ""type"": ""int"" + } + ] + } + }] +}"; + + private const string _nullableLogicalTypesArray = @" +{""namespace"": ""org.apache.avro.codegentest.testdata"", + ""type"": ""record"", + ""name"": ""NullableLogicalTypesArray"", + ""doc"" : ""Test nested types with logical types in generated Java classes"", + ""fields"": [ + { + ""name"": ""arrayOfLogicalType"", + ""type"": { + ""type"": ""array"", + ""items"": [""null"", {""type"": ""int"", ""logicalType"": ""date""}] + } + }] +}"; + + private const string _nullableLogicalTypes = @" +{""namespace"": ""org.apache.avro.codegentest.testdata"", + ""type"": ""record"", + ""name"": ""NullableLogicalTypes"", + ""doc"" : ""Test unions with logical types in generated Java classes"", + ""fields"": [ + {""name"": ""nullableDate"", ""type"": [""null"", {""type"": ""int"", ""logicalType"": ""date""}], ""default"": null} + ] +}"; + + private const string _stringLogicalType = @" +{ + ""namespace"": ""org.apache.avro.codegentest.testdata"", + ""type"": ""record"", + ""name"": ""StringLogicalType"", + ""doc"": ""Test logical type applied to field of type string"", + ""fields"": [ + { + ""name"": ""someIdentifier"", + ""type"": { + ""type"": ""string"", + ""logicalType"": ""uuid"" + } +}, + { + ""name"": ""someJavaString"", + ""type"": ""string"", + ""doc"": ""Just to ensure no one removed String because this is the basis of this test"" + } + ] +}"; + + // https://issues.apache.org/jira/browse/AVRO-2883 + private const string _schema_avro_2883 = @" +{ + ""type"" : ""record"", + ""name"" : ""TestModel"", + ""namespace"" : ""my.avro.ns"", + ""fields"" : [ { + ""name"" : ""eventType"", + ""type"" : { + ""type"" : ""enum"", + ""name"" : ""EventType"", + ""symbols"" : [ ""CREATE"", ""UPDATE"", ""DELETE"" ] + } +} ] +}"; + + // https://issues.apache.org/jira/browse/AVRO-3046 + private const string _schema_avro_3046 = @" +{ + ""type"": ""record"", + ""name"": ""ExampleRecord"", + ""namespace"": ""com.example"", + ""fields"": [ + { + ""name"": ""Id"", + ""type"": ""string"", + ""logicalType"": ""UUID"" + }, + { + ""name"": ""InnerRecord"", + ""type"": { + ""type"": ""record"", + ""name"": ""InnerRecord"", + ""fields"": [ + { + ""name"": ""Id"", + ""type"": ""string"", + ""logicalType"": ""UUID"" + } + ] + } + } + ] +}"; + + private Assembly TestSchema( + string schema, + IEnumerable typeNamesToCheck = null, + IEnumerable> namespaceMapping = null, + IEnumerable generatedFilesToCheck = null) + { + // Create temp folder + string outputDir = AvroGenHelper.CreateEmptyTemporaryFolder(out string uniqueId); + + try + { + // Save schema + string schemaFileName = Path.Combine(outputDir, $"{uniqueId}.avsc"); + System.IO.File.WriteAllText(schemaFileName, schema); + + // Generate from schema file + Assert.That(AvroGenTool.GenSchema(schemaFileName, outputDir, namespaceMapping ?? new Dictionary(), false), Is.EqualTo(0)); + + // Check if all generated files exist + if (generatedFilesToCheck != null) + { + foreach (string generatedFile in generatedFilesToCheck) + { + Assert.That(new FileInfo(Path.Combine(outputDir, generatedFile)), Does.Exist); + } + } + + // Compile into netstandard library and load assembly + Assembly assembly = AvroGenHelper.CompileCSharpFilesIntoLibrary( + new DirectoryInfo(outputDir) + .EnumerateFiles("*.cs", SearchOption.AllDirectories) + .Select(fi => fi.FullName), + uniqueId); + + if (typeNamesToCheck != null) + { + // Check if the compiled code has the same number of types defined as the check list + Assert.That(typeNamesToCheck.Count(), Is.EqualTo(assembly.DefinedTypes.Count())); + + // Check if types available in compiled assembly + foreach (string typeName in typeNamesToCheck) + { + Type type = assembly.GetType(typeName); + Assert.That(type, Is.Not.Null); + + // Instantiate + object obj = Activator.CreateInstance(type); + Assert.That(obj, Is.Not.Null); + } + } + + // Verify GeneratedCodeAttribute + foreach(System.Reflection.TypeInfo definedType in assembly.DefinedTypes) + { + var generatedAttributes = definedType.CustomAttributes.Where(x => x.AttributeType.FullName == "System.CodeDom.Compiler.GeneratedCodeAttribute"); + Assert.That(generatedAttributes, Is.Not.Null); + } + + return assembly; + } + finally + { + Directory.Delete(outputDir, true); + } + } + + [TestCase( + _logicalTypesWithDefaults, + new string[] + { + "org.apache.avro.codegentest.testdata.LogicalTypesWithDefaults" + }, + new string[] + { + "org/apache/avro/codegentest/testdata/LogicalTypesWithDefaults.cs" + })] + [TestCase( + _nestedLogicalTypesArray, + new string[] + { + "org.apache.avro.codegentest.testdata.NestedLogicalTypesArray", + "org.apache.avro.codegentest.testdata.RecordInArray" + }, + new string[] + { + "org/apache/avro/codegentest/testdata/NestedLogicalTypesArray.cs", + "org/apache/avro/codegentest/testdata/RecordInArray.cs" + })] + [TestCase( + _nestedLogicalTypesMap, + new string[] + { + "org.apache.avro.codegentest.testdata.NestedLogicalTypesMap", + "org.apache.avro.codegentest.testdata.RecordInMap" + }, + new string[] + { + "org/apache/avro/codegentest/testdata/NestedLogicalTypesMap.cs", + "org/apache/avro/codegentest/testdata/RecordInMap.cs" + })] + [TestCase( + _nestedLogicalTypesRecord, + new string[] + { + "org.apache.avro.codegentest.testdata.NestedLogicalTypesRecord", + "org.apache.avro.codegentest.testdata.NestedRecord" + }, + new string[] + { + "org/apache/avro/codegentest/testdata/NestedLogicalTypesRecord.cs", + "org/apache/avro/codegentest/testdata/NestedRecord.cs" + })] + [TestCase( + _nestedLogicalTypesUnion, + new string[] + { + "org.apache.avro.codegentest.testdata.NestedLogicalTypesUnion", + "org.apache.avro.codegentest.testdata.RecordInUnion" + }, + new string[] + { + "org/apache/avro/codegentest/testdata/NestedLogicalTypesUnion.cs", + "org/apache/avro/codegentest/testdata/RecordInUnion.cs" + })] + [TestCase( + _nestedSomeNamespaceRecord, + new string[] + { + "org.apache.avro.codegentest.some.NestedSomeNamespaceRecord", + "org.apache.avro.codegentest.other.NestedOtherNamespaceRecord" + }, + new string[] + { + "org/apache/avro/codegentest/some/NestedSomeNamespaceRecord.cs", + "org/apache/avro/codegentest/other/NestedOtherNamespaceRecord.cs" + })] + [TestCase( + _nestedLogicalTypesUnionFixedDecimal, + new string[] + { + "org.apache.avro.codegentest.testdata.NestedLogicalTypesUnionFixedDecimal" + }, + new string[] + { + "org/apache/avro/codegentest/testdata/NestedLogicalTypesUnionFixedDecimal.cs" + })] + [TestCase( + _nullableLogicalTypes, + new string[] + { + "org.apache.avro.codegentest.testdata.NullableLogicalTypes" + }, + new string[] + { + "org/apache/avro/codegentest/testdata/NullableLogicalTypes.cs" + })] + [TestCase( + _nullableLogicalTypesArray, + new string[] + { + "org.apache.avro.codegentest.testdata.NullableLogicalTypesArray" + }, + new string[] + { + "org/apache/avro/codegentest/testdata/NullableLogicalTypesArray.cs" + })] + [TestCase( + _schema_avro_2883, + new string[] + { + "my.avro.ns.TestModel", + "my.avro.ns.EventType", + }, + new string[] + { + "my/avro/ns/TestModel.cs", + "my/avro/ns/EventType.cs" + })] + public void GenerateSchema(string schema, IEnumerable typeNamesToCheck, IEnumerable generatedFilesToCheck) + { + AvroGenHelper.TestSchema(schema, typeNamesToCheck, generatedFilesToCheck: generatedFilesToCheck); + } + + [TestCase( + _nullableLogicalTypesArray, + "org.apache.avro.codegentest.testdata", "org.apache.csharp.codegentest.testdata", + new string[] + { + "org.apache.csharp.codegentest.testdata.NullableLogicalTypesArray" + }, + new string[] + { + "org/apache/csharp/codegentest/testdata/NullableLogicalTypesArray.cs" + })] + [TestCase( + _nestedLogicalTypesUnion, + "org.apache.avro.codegentest.testdata", "org.apache.csharp.codegentest.testdata", + new string[] + { + "org.apache.csharp.codegentest.testdata.NestedLogicalTypesUnion", + "org.apache.csharp.codegentest.testdata.RecordInUnion" + }, + new string[] + { + "org/apache/csharp/codegentest/testdata/NestedLogicalTypesUnion.cs", + "org/apache/csharp/codegentest/testdata/RecordInUnion.cs" + })] + [TestCase( + _schema_avro_2883, + "my.avro.ns", "my.csharp.ns", + new string[] + { + "my.csharp.ns.TestModel", + "my.csharp.ns.EventType", + }, + new string[] + { + "my/csharp/ns/TestModel.cs", + "my/csharp/ns/EventType.cs" + })] + [TestCase( + _schema_avro_3046, + "com.example", "Example", + new string[] + { + "Example.ExampleRecord", + "Example.InnerRecord", + }, + new string[] + { + "Example/ExampleRecord.cs", + "Example/InnerRecord.cs" + })] + [TestCase( + _nullableLogicalTypesArray, + "org.apache.avro.codegentest.testdata", "org.apache.@return.@int", // Reserved keywords in namespace + new string[] + { + "org.apache.return.int.NullableLogicalTypesArray" + }, + new string[] + { + "org/apache/return/int/NullableLogicalTypesArray.cs" + })] + [TestCase( + _nullableLogicalTypesArray, + "org.apache.avro.codegentest.testdata", "org.apache.value.partial", // Contextual keywords in namespace + new string[] + { + "org.apache.value.partial.NullableLogicalTypesArray" + }, + new string[] + { + "org/apache/value/partial/NullableLogicalTypesArray.cs" + })] + [TestCase(@" +{ + ""type"": ""fixed"", + ""namespace"": ""com.base"", + ""name"": ""MD5"", + ""size"": 16 +}", + "com.base", "SchemaTest", + new string[] + { + "SchemaTest.MD5" + }, + new string[] + { + "SchemaTest/MD5.cs" + })] + [TestCase(@" +{ + ""type"": ""fixed"", + ""namespace"": ""com.base"", + ""name"": ""MD5"", + ""size"": 16 +}", + "miss", "SchemaTest", + new string[] + { + "com.base.MD5" + }, + new string[] + { + "com/base/MD5.cs" + })] + public void GenerateSchemaWithNamespaceMapping( + string schema, + string namespaceMappingFrom, + string namespaceMappingTo, + IEnumerable typeNamesToCheck, + IEnumerable generatedFilesToCheck) + { + AvroGenHelper.TestSchema(schema, typeNamesToCheck, new Dictionary { { namespaceMappingFrom, namespaceMappingTo } }, generatedFilesToCheck); + } + + [TestCase(_logicalTypesWithCustomConversion)] + [TestCase(_customConversionWithLogicalTypes)] + public void UnknownLogicalTypesFallbackToBaseType(string schema) + { + // Create temp folder + string outputDir = AvroGenHelper.CreateEmptyTemporaryFolder(out string uniqueId); + + try + { + // Save schema + string schemaFileName = Path.Combine(outputDir, $"{uniqueId}.avsc"); + System.IO.File.WriteAllText(schemaFileName, schema); + + // Assert that the generator successfully runs (exit code 0) + // by ignoring the unknown logical types and using the underlying base types + Assert.That(AvroGenTool.GenSchema(schemaFileName, outputDir, new Dictionary(), false), Is.EqualTo(0)); + } + finally + { + Directory.Delete(outputDir, true); + } + } + + [TestCase(@" +{ + ""type"" : ""record"", + ""name"" : ""ClassKeywords"", + ""namespace"" : ""com.base"", + ""fields"" : + [ + { ""name"" : ""int"", ""type"" : ""int"" }, + { ""name"" : ""base"", ""type"" : ""long"" }, + { ""name"" : ""event"", ""type"" : ""boolean"" }, + { ""name"" : ""foreach"", ""type"" : ""double"" }, + { ""name"" : ""bool"", ""type"" : ""float"" }, + { ""name"" : ""internal"", ""type"" : ""bytes"" }, + { ""name"" : ""while"", ""type"" : ""string"" }, + { ""name"" : ""return"", ""type"" : ""null"" }, + { ""name"" : ""enum"", ""type"" : { ""type"" : ""enum"", ""name"" : ""class"", ""symbols"" : [ ""Unknown"", ""A"", ""B"" ], ""default"" : ""Unknown"" } }, + { ""name"" : ""string"", ""type"" : { ""type"": ""fixed"", ""size"": 16, ""name"": ""static"" } } + ] +}", + new object[] { "com.base.ClassKeywords", typeof(int), typeof(long), typeof(bool), typeof(double), typeof(float), typeof(byte[]), typeof(string), typeof(object), "com.base.class", "com.base.static" })] + [TestCase(@" +{ + ""type"" : ""record"", + ""name"" : ""AvroNamespaceType"", + ""namespace"" : ""My.Avro"", + ""fields"" : + [ + { ""name"" : ""justenum"", ""type"" : { ""type"" : ""enum"", ""name"" : ""justenumEnum"", ""symbols"" : [ ""One"", ""Two"" ] } }, + ] +}", + new object[] { "My.Avro.AvroNamespaceType", "My.Avro.justenumEnum" })] + [TestCase(@" +{ + ""type"" : ""record"", + ""name"" : ""SchemaObject"", + ""namespace"" : ""schematest"", + ""fields"" : + [ + { ""name"" : ""myobject"", ""type"" : + [ + ""null"", + { ""type"" : ""array"", ""items"" : + [ + ""null"", + { ""type"" : ""enum"", ""name"" : ""MyEnum"", ""symbols"" : [ ""A"", ""B"" ] }, + { ""type"": ""fixed"", ""size"": 16, ""name"": ""MyFixed"" } + ] + } + ] + } + ] +}", + new object[] { "schematest.SchemaObject", typeof(IList) })] + [TestCase(@" +{ + ""type"" : ""record"", + ""name"" : ""LogicalTypes"", + ""namespace"" : ""schematest"", + ""fields"" : + [ + { ""name"" : ""nullibleguid"", ""type"" : [""null"", {""type"": ""string"", ""logicalType"": ""uuid"" } ]}, + { ""name"" : ""guid"", ""type"" : {""type"": ""string"", ""logicalType"": ""uuid"" } }, + { ""name"" : ""nullibletimestampmillis"", ""type"" : [""null"", {""type"": ""long"", ""logicalType"": ""timestamp-millis""}] }, + { ""name"" : ""timestampmillis"", ""type"" : {""type"": ""long"", ""logicalType"": ""timestamp-millis""} }, + { ""name"" : ""nullibiletimestampmicros"", ""type"" : [""null"", {""type"": ""long"", ""logicalType"": ""timestamp-micros""}] }, + { ""name"" : ""timestampmicros"", ""type"" : {""type"": ""long"", ""logicalType"": ""timestamp-micros""} }, + { ""name"" : ""nulliblelocaltimestampmillis"", ""type"" : [""null"", {""type"": ""long"", ""logicalType"": ""local-timestamp-millis""}] }, + { ""name"" : ""localtimestampmillis"", ""type"" : {""type"": ""long"", ""logicalType"": ""local-timestamp-millis""} }, + { ""name"" : ""nullibilelocaltimestampmicros"", ""type"" : [""null"", {""type"": ""long"", ""logicalType"": ""local-timestamp-micros""}] }, + { ""name"" : ""locallocaltimestampmicros"", ""type"" : {""type"": ""long"", ""logicalType"": ""local-timestamp-micros""} }, + { ""name"" : ""nullibiletimemicros"", ""type"" : [""null"", {""type"": ""long"", ""logicalType"": ""time-micros""}] }, + { ""name"" : ""timemicros"", ""type"" : {""type"": ""long"", ""logicalType"": ""time-micros""} }, + { ""name"" : ""nullibiletimemillis"", ""type"" : [""null"", {""type"": ""int"", ""logicalType"": ""time-millis""}] }, + { ""name"" : ""timemillis"", ""type"" : {""type"": ""int"", ""logicalType"": ""time-millis""} }, + { ""name"" : ""nullibledecimal"", ""type"" : [""null"", {""type"": ""bytes"", ""logicalType"": ""decimal"", ""precision"": 4, ""scale"": 2}] }, + { ""name"" : ""decimal"", ""type"" : {""type"": ""bytes"", ""logicalType"": ""decimal"", ""precision"": 4, ""scale"": 2} }, + { ""name"" : ""nullibledecimalfixed"", ""type"" : [""null"", {""type"": {""type"" : ""fixed"", ""size"": 16, ""name"": ""ndf""}, ""logicalType"": ""decimal"", ""precision"": 4, ""scale"": 2}] }, + { ""name"" : ""decimalfixed"", ""type"" : {""type"": {""type"" : ""fixed"", ""size"": 16, ""name"": ""df""}, ""logicalType"": ""decimal"", ""precision"": 4, ""scale"": 2} } + ] +}", + new object[] { "schematest.LogicalTypes", typeof(Guid?), typeof(Guid), typeof(DateTime?), typeof(DateTime), typeof(DateTime?), typeof(DateTime), typeof(DateTime?), typeof(DateTime), typeof(DateTime?), typeof(DateTime), typeof(TimeSpan?), typeof(TimeSpan), typeof(TimeSpan?), typeof(TimeSpan), typeof(AvroDecimal?), typeof(AvroDecimal), typeof(AvroDecimal?), typeof(AvroDecimal) })] + [TestCase(@" +{ + ""namespace"": ""enum.base"", + ""type"": ""record"", + ""name"": ""EnumInDifferentNamespace"", + ""doc"": ""Test enum with a default value in a different namespace"", + ""fields"": [ + { + ""name"": ""anEnum"", + ""type"": { + ""namespace"": ""enum.base.other"", + ""type"": ""enum"", + ""name"": ""AnEnum"", + ""symbols"": [ + ""A"", + ""B"" + ], + ""default"": ""A"" + } + } + ] +}", + new object[] { "enum.base.EnumInDifferentNamespace", "enum.base.other.AnEnum" })] + public void GenerateSchemaCheckFields(string schema, object[] result) + { + Assembly assembly = AvroGenHelper.TestSchema(schema); + + // Instantiate object + Type type = assembly.GetType((string)result[0]); + Assert.That(type, Is.Not.Null); + + ISpecificRecord record = Activator.CreateInstance(type) as ISpecificRecord; + Assert.IsNotNull(record); + + // test type of each fields + for (int i = 1; i < result.Length; ++i) + { + object field = record.Get(i - 1); + Type stype; + if (result[i].GetType() == typeof(string)) + { + Type t = assembly.GetType((string)result[i]); + Assert.That(record, Is.Not.Null); + + object obj = Activator.CreateInstance(t); + Assert.That(obj, Is.Not.Null); + stype = obj.GetType(); + } + else + { + stype = (Type)result[i]; + } + if (!stype.IsValueType) + { + Assert.That(field, Is.Null); // can't test reference type, it will be null + } + else if (stype.IsValueType && field == null) + { + Assert.That(field, Is.Null); // nullable value type, so we can't get the type using GetType + } + else + { + Assert.That(field, Is.Not.Null); + Assert.That(field.GetType(), Is.EqualTo(stype)); + } + } + } + + [TestCase( + _nullableLogicalTypesArray, + new string[] + { + "org.apache.avro.codegentest.testdata.NullableLogicalTypesArray" + }, + new string[] + { + "NullableLogicalTypesArray.cs" + })] + [TestCase( + _nestedSomeNamespaceRecord, + new string[] + { + "org.apache.avro.codegentest.some.NestedSomeNamespaceRecord", + "org.apache.avro.codegentest.other.NestedOtherNamespaceRecord" + }, + new string[] + { + "NestedSomeNamespaceRecord.cs", + "NestedOtherNamespaceRecord.cs" + })] + [TestCase(_schema_avro_2883, + new string[] + { + "my.avro.ns.TestModel", + "my.avro.ns.EventType", + }, + new string[] + { + "TestModel.cs", + "EventType.cs" + })] + public void GenerateSchemaWithSkipDirectoriesOption(string schema, IEnumerable typeNamesToCheck, IEnumerable generatedFilesToCheck) + { + AvroGenHelper.TestSchema(schema, typeNamesToCheck, generatedFilesToCheck: generatedFilesToCheck, skipDirectories: true); + } + } +} diff --git a/lang/csharp/src/apache/test/AvroGen/AvroGenToolTests.cs b/lang/csharp/src/apache/test/AvroGen/AvroGenToolTests.cs new file mode 100644 index 00000000000..698ff468c2d --- /dev/null +++ b/lang/csharp/src/apache/test/AvroGen/AvroGenToolTests.cs @@ -0,0 +1,103 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System; +using System.Linq; +using System.Reflection; +using NUnit.Framework; + +namespace Avro.Test.AvroGen +{ + [TestFixture] + + class AvroGenToolTests + { + [Test] + public void CommandLineNoArgs() + { + AvroGenToolResult result = AvroGenHelper.RunAvroGenTool(Array.Empty()); + + Assert.That(result.ExitCode, Is.EqualTo(1)); + Assert.That(result.StdOut, Is.Not.Empty); + Assert.That(result.StdErr, Is.Empty); + } + + [TestCase("-h")] + [TestCase("--help")] + [TestCase("--help", "-h")] + [TestCase("--help", "-s", "whatever.avsc", ".")] + [TestCase("-p", "whatever.avpr", ".", "-h")] + public void CommandLineHelp(params string[] args) + { + AvroGenToolResult result = AvroGenHelper.RunAvroGenTool(args); + + Assert.That(result.ExitCode, Is.EqualTo(0)); + Assert.That(result.StdOut, Is.Not.Empty); + Assert.That(result.StdErr, Is.Empty); + } + + [TestCase("--version")] + [TestCase("-V")] + public void CommandLineVersion(params string[] args) + { + AvroGenToolResult result = AvroGenHelper.RunAvroGenTool(args); + + Assert.That(result.ExitCode, Is.EqualTo(0)); + Assert.That(result.StdOut, Is.Not.Empty); + Assert.That(result.StdErr, Is.Empty); + + // Check if returned version is SemVer 2.0 compliant + Assert.That(result.StdOut[0], Does.Match(Utils.VersionTests.SemVerRegex)); + + // Returned version must be the same as the avrogen tool assembly's version + Assert.That(result.StdOut[0], Is.EqualTo(typeof(AvroGenTool).Assembly.GetCustomAttribute().InformationalVersion)); + + // Returned version must be the same as the avro library assembly's version + Assert.That(result.StdOut[0], Is.EqualTo(typeof(Schema).Assembly.GetCustomAttribute().InformationalVersion)); + } + + [TestCase("-p")] + [TestCase("-s")] + [TestCase("-p", "whatever.avpr")] + [TestCase("-p", "whatever.avpr")] + [TestCase("-s", "whatever.avsc")] + [TestCase("whatever.avsc")] + [TestCase("whatever.avsc", ".")] + [TestCase(".")] + [TestCase("-s", "whatever.avsc", "--namespace")] + [TestCase("-s", "whatever.avsc", "--namespace", "org.apache")] + [TestCase("-s", "whatever.avsc", "--namespace", "org.apache:")] + [TestCase("-s", "whatever.avsc", ".", "whatever")] + public void CommandLineInvalidArgs(params string[] args) + { + AvroGenToolResult result = AvroGenHelper.RunAvroGenTool(args); + + Assert.That(result.ExitCode, Is.EqualTo(1)); + Assert.That(result.StdOut, Is.Not.Empty); + Assert.That(result.StdErr, Is.Not.Empty); + } + + [Theory] + public void CommandLineHelpContainsSkipDirectoriesParameter() + { + AvroGenToolResult result = AvroGenHelper.RunAvroGenTool("-h"); + + Assert.That(result.ExitCode, Is.EqualTo(0)); + Assert.IsTrue(result.StdOut.Any(s => s.Contains("--skip-directories"))); + } + } +} diff --git a/lang/csharp/src/apache/test/CodGen/CodeGenTest.cs b/lang/csharp/src/apache/test/CodGen/CodeGenTest.cs index c2889897d64..33c7f0cf6ee 100644 --- a/lang/csharp/src/apache/test/CodGen/CodeGenTest.cs +++ b/lang/csharp/src/apache/test/CodGen/CodeGenTest.cs @@ -17,174 +17,125 @@ */ using System; using System.Collections.Generic; -using System.IO; -using System.CodeDom.Compiler; -using Microsoft.CSharp; +using System.Linq; +using System.Text.RegularExpressions; +using Microsoft.CodeAnalysis.CSharp; using NUnit.Framework; -using Avro.Specific; -namespace Avro.Test +namespace Avro.Test.CodeGen { [TestFixture] - - class CodeGenTest + class CodeGenTests { -#if !NETCOREAPP // System.CodeDom compilation not supported in .NET Core: https://github.com/dotnet/corefx/issues/12180 - [TestCase(@"{ -""type"" : ""record"", -""name"" : ""ClassKeywords"", -""namespace"" : ""com.base"", -""fields"" : - [ - { ""name"" : ""int"", ""type"" : ""int"" }, - { ""name"" : ""base"", ""type"" : ""long"" }, - { ""name"" : ""event"", ""type"" : ""boolean"" }, - { ""name"" : ""foreach"", ""type"" : ""double"" }, - { ""name"" : ""bool"", ""type"" : ""float"" }, - { ""name"" : ""internal"", ""type"" : ""bytes"" }, - { ""name"" : ""while"", ""type"" : ""string"" }, - { ""name"" : ""return"", ""type"" : ""null"" }, - { ""name"" : ""enum"", ""type"" : { ""type"" : ""enum"", ""name"" : ""class"", ""symbols"" : [ ""Unknown"", ""A"", ""B"" ], ""default"" : ""Unknown"" } }, - { ""name"" : ""string"", ""type"" : { ""type"": ""fixed"", ""size"": 16, ""name"": ""static"" } } - ] -} -", new object[] {"com.base.ClassKeywords", typeof(int), typeof(long), typeof(bool), typeof(double), typeof(float), typeof(byte[]), typeof(string),typeof(object),"com.base.class", "com.base.static"}, TestName = "TestCodeGen0")] - [TestCase(@"{ -""type"" : ""record"", -""name"" : ""SchemaObject"", -""namespace"" : ""schematest"", -""fields"" : - [ - { ""name"" : ""myobject"", ""type"" : - [ - ""null"", - {""type"" : ""array"", ""items"" : [ ""null"", - { ""type"" : ""enum"", ""name"" : ""MyEnum"", ""symbols"" : [ ""A"", ""B"" ] }, - { ""type"": ""fixed"", ""size"": 16, ""name"": ""MyFixed"" } - ] - } - ] - } - ] -} -", new object[] { "schematest.SchemaObject", typeof(IList) }, TestName = "TestCodeGen1")] - [TestCase(@"{ - ""type"" : ""record"", - ""name"" : ""LogicalTypes"", - ""namespace"" : ""schematest"", - ""fields"" : - [ - { ""name"" : ""nullibleguid"", ""type"" : [""null"", {""type"": ""string"", ""logicalType"": ""uuid"" } ]}, - { ""name"" : ""guid"", ""type"" : {""type"": ""string"", ""logicalType"": ""uuid"" } }, - { ""name"" : ""nullibletimestampmillis"", ""type"" : [""null"", {""type"": ""long"", ""logicalType"": ""timestamp-millis""}] }, - { ""name"" : ""timestampmillis"", ""type"" : {""type"": ""long"", ""logicalType"": ""timestamp-millis""} }, - { ""name"" : ""nullibiletimestampmicros"", ""type"" : [""null"", {""type"": ""long"", ""logicalType"": ""timestamp-micros""}] }, - { ""name"" : ""timestampmicros"", ""type"" : {""type"": ""long"", ""logicalType"": ""timestamp-micros""} }, - { ""name"" : ""nullibiletimemicros"", ""type"" : [""null"", {""type"": ""long"", ""logicalType"": ""time-micros""}] }, - { ""name"" : ""timemicros"", ""type"" : {""type"": ""long"", ""logicalType"": ""time-micros""} }, - { ""name"" : ""nullibiletimemillis"", ""type"" : [""null"", {""type"": ""int"", ""logicalType"": ""time-millis""}] }, - { ""name"" : ""timemillis"", ""type"" : {""type"": ""int"", ""logicalType"": ""time-millis""} }, - { ""name"" : ""nullibledecimal"", ""type"" : [""null"", {""type"": ""bytes"", ""logicalType"": ""decimal"", ""precision"": 4, ""scale"": 2}] }, - { ""name"" : ""decimal"", ""type"" : {""type"": ""bytes"", ""logicalType"": ""decimal"", ""precision"": 4, ""scale"": 2} } - ] -} -", new object[] { "schematest.LogicalTypes", typeof(Guid?), typeof(Guid), typeof(DateTime?), typeof(DateTime), typeof(DateTime?), typeof(DateTime), typeof(TimeSpan?), typeof(TimeSpan), typeof(TimeSpan?), typeof(TimeSpan), typeof(AvroDecimal?), typeof(AvroDecimal) }, TestName = "TestCodeGen2 - Logical Types")] - public static void TestCodeGen(string str, object[] result) - { - Schema schema = Schema.Parse(str); - CompilerResults compres = GenerateSchema(schema); - - // instantiate object - ISpecificRecord rec = compres.CompiledAssembly.CreateInstance((string)result[0]) as ISpecificRecord; - Assert.IsNotNull(rec); - - // test type of each fields - for (int i = 1; i < result.Length; ++i) - { - object field = rec.Get(i - 1); - Type stype; - if (result[i].GetType() == typeof(string)) - { - object obj = compres.CompiledAssembly.CreateInstance((string)result[i]); - Assert.IsNotNull(obj); - stype = obj.GetType(); - } - else - stype = (Type)result[i]; - if (!stype.IsValueType) - Assert.IsNull(field); // can't test reference type, it will be null - else if (stype.IsValueType && field == null) - Assert.IsNull(field); // nullable value type, so we can't get the type using GetType - else - Assert.AreEqual(stype, field.GetType()); - } + [Test] + public void TestGetNullableTypeException() + { + Assert.Throws(() => Avro.CodeGen.GetNullableType(null)); } - [TestCase(@"{ -""type"": ""fixed"", -""namespace"": ""com.base"", -""name"": ""MD5"", -""size"": 16 -}", null, null, "com.base")] - [TestCase(@"{ -""type"": ""fixed"", -""namespace"": ""com.base"", -""name"": ""MD5"", -""size"": 16 -}", "com.base", "SchemaTest", "SchemaTest")] - [TestCase(@"{ -""type"": ""fixed"", -""namespace"": ""com.base"", -""name"": ""MD5"", -""size"": 16 -}", "miss", "SchemaTest", "com.base")] - public void TestCodeGenNamespaceMapping(string str, string avroNamespace, string csharpNamespace, - string expectedNamespace) + [Test] + public void TestReservedKeywords() { - Schema schema = Schema.Parse(str); + // https://github.com/dotnet/roslyn/blob/main/src/Compilers/CSharp/Portable/Syntax/SyntaxKindFacts.cs - var codegen = new CodeGen(); - codegen.AddSchema(schema); - - if (avroNamespace != null && csharpNamespace != null) + // Check if all items in CodeGenUtil.Instance.ReservedKeywords are keywords + foreach (string keyword in CodeGenUtil.Instance.ReservedKeywords) { - codegen.NamespaceMapping[avroNamespace] = csharpNamespace; + Assert.That(SyntaxFacts.GetKeywordKind(keyword) != SyntaxKind.None, Is.True); } - var results = GenerateAssembly(codegen); - foreach(var type in results.CompiledAssembly.GetTypes()) + // Check if all Roslyn defined keywords are in CodeGenUtil.Instance.ReservedKeywords + foreach (SyntaxKind keywordKind in SyntaxFacts.GetReservedKeywordKinds()) { - Assert.AreEqual(expectedNamespace, type.Namespace); + Assert.That(CodeGenUtil.Instance.ReservedKeywords, Does.Contain(SyntaxFacts.GetText(keywordKind))); } + + // If this test fails, CodeGenUtil.ReservedKeywords list must be updated. + // This might happen if newer version of C# language defines new reserved keywords. } - private static CompilerResults GenerateSchema(Schema schema) + [TestCase("a", "a")] + [TestCase("a.b", "a.b")] + [TestCase("a.b.c", "a.b.c")] + [TestCase("int", "@int")] + [TestCase("a.long.b", "a.@long.b")] + [TestCase("int.b.c", "@int.b.c")] + [TestCase("a.b.int", "a.b.@int")] + [TestCase("int.long.while", "@int.@long.@while")] // Reserved keywords + [TestCase("a.value.partial", "a.value.partial")] // Contextual keywords + [TestCase("a.value.b.int.c.while.longpartial", "a.value.b.@int.c.@while.longpartial")] // Reserved and contextual keywords + public void TestMangleUnMangle(string input, string mangled) { - var codegen = new CodeGen(); - codegen.AddSchema(schema); - return GenerateAssembly(codegen); + // Mangle + Assert.That(CodeGenUtil.Instance.Mangle(input), Is.EqualTo(mangled)); + // Unmangle + Assert.That(CodeGenUtil.Instance.UnMangle(mangled), Is.EqualTo(input)); } - private static CompilerResults GenerateAssembly(CodeGen schema) + [TestFixture] + public class CodeGenTestClass : Avro.CodeGen { - var compileUnit = schema.GenerateCode(); + [Test] + public void TestGenerateNamesException() + { + Protocol protocol = null; + Assert.Throws(() => this.GenerateNames(protocol)); + } + + + [Test] + public void GetTypesShouldReturnTypes() + { + AddSchema(@" +{ + ""name"": ""PlanetEnum"", + ""namespace"": ""Space.Models"", + ""type"": ""enum"", + ""symbols"": [ + ""Earth"", + ""Mars"", + ""Jupiter"", + ""Saturn"", + ""Uranus"", + ""Neptune"" + ] +} +"); + GenerateCode(); + var types = GetTypes(); + Assert.That(types.Count, Is.EqualTo(1)); + bool hasPlanetEnumCode = types.TryGetValue("PlanetEnum", out string planetEnumCode); + Assert.That(hasPlanetEnumCode); + Assert.That(Regex.Matches(planetEnumCode, "public enum PlanetEnum").Count, Is.EqualTo(1)); + } - var comparam = new CompilerParameters(new string[] { "netstandard.dll" }); - comparam.ReferencedAssemblies.Add("System.dll"); - comparam.ReferencedAssemblies.Add(Path.Combine(TestContext.CurrentContext.TestDirectory, "Avro.dll")); - comparam.GenerateInMemory = true; - var ccp = new CSharpCodeProvider(); - var units = new[] { compileUnit }; - var compres = ccp.CompileAssemblyFromDom(comparam, units); - if (compres.Errors.Count > 0) + [Test] + public void EnumWithKeywordSymbolsShouldHavePrefixedSymbols() { - for (int i = 0; i < compres.Errors.Count; i++) - Console.WriteLine(compres.Errors[i]); + AddSchema(@"{ + ""type"": ""enum"", + ""symbols"": [ + ""string"", + ""integer"", + ""float"", + ""boolean"", + ""list"", + ""dict"", + ""regex"" + ], + ""name"": ""type"", + ""namespace"": ""com.example"" +}"); + GenerateCode(); + var types = GetTypes(); + Assert.That(types.Count, Is.EqualTo(1)); + bool hasTypeCode = types.TryGetValue("type", out string typeCode); + Assert.That(hasTypeCode); + Assert.That(Regex.Matches(typeCode, "public enum type").Count, Is.EqualTo(1)); + Assert.That(Regex.Matches(typeCode, "@string,").Count, Is.EqualTo(1)); + Assert.That(Regex.Matches(typeCode, "@float,").Count, Is.EqualTo(1)); } - Assert.AreEqual(0, compres.Errors.Count); - return compres; } -#endif } } diff --git a/lang/csharp/src/apache/test/File/FileTests.cs b/lang/csharp/src/apache/test/File/FileTests.cs index 9229bf46974..abb3f9c6076 100644 --- a/lang/csharp/src/apache/test/File/FileTests.cs +++ b/lang/csharp/src/apache/test/File/FileTests.cs @@ -18,6 +18,7 @@ using System; using System.Collections; using System.Collections.Generic; +using System.Diagnostics; using System.IO; using System.IO.Compression; using System.Linq; @@ -34,32 +35,113 @@ public class FileTests const string specificSchema = "{\"type\":\"record\",\"name\":\"Foo\",\"namespace\":\"Avro.Test.File\",\"fields\":" + "[{\"name\":\"name\",\"type\":[\"null\",\"string\"]},{\"name\":\"age\",\"type\":\"int\"}]}"; + /// + /// This test case added to confirm standalone serialization / deserialization behavior of new type UnknownLogicalType + /// + const string unknowLogicalTypeSchema = @" +{ + ""type"" : ""record"", + ""name"" : ""Foo"", + ""namespace"" : ""Avro.Test.File"", + ""fields"": [ + { + ""name"" :""name"", + ""type"": [ + ""null"", + { + ""logicalType"": ""varchar"", + ""maxLength"": 65, + ""type"": ""string"" + } + ] + }, + { + ""name"" : ""age"", + ""type"" : ""int"" + } + ] +} +"; + + private static IEnumerable TestSpecificDataSource() + { + foreach (Codec.Type codecType in Enum.GetValues(typeof(Codec.Type))) + { + yield return new TestCaseData(specificSchema, new object[] + { + new object[] { "John", 23 } + }, codecType).SetName("{m}(Case0,{2})"); + + yield return new TestCaseData(specificSchema, new object[] + { + new object[] { "John", 23 }, + new object[] { "Jane", 99 }, + new object[] { "Jeff", 88 } + }, codecType).SetName("{m}(Case1,{2})"); + + yield return new TestCaseData(specificSchema, new object[] + { + new object[] { "John", 23 }, + new object[] { "Jane", 99 }, + new object[] { "Jeff", 88 }, + new object[] { "James", 13 }, + new object[] { "June", 109 }, + new object[] { "Lloyd", 18 }, + new object[] {"Jenny", 3}, + new object[] { "Bob", 9 }, + new object[] { null, 48 } + }, codecType).SetName("{m}(Case2,{2})"); + + yield return new TestCaseData(specificSchema, new object[] + { + new object[] { "John", 23}, + new object[] { "Jane", 99 }, + new object[] { "Jeff", 88 }, + new object[] { "James", 13 }, + new object[] { "June", 109 }, + new object[] { "Lloyd", 18 }, + new object[] { "Jamie", 53 }, + new object[] { "Fanessa", 101 }, + new object[] { "Kan", 18 }, + new object[] { "Janey", 33 }, + new object[] { "Deva", 102 }, + new object[] { "Gavin", 28 }, + new object[] { "Lochy", 113 }, + new object[] { "Nickie", 10 }, + new object[] { "Liddia", 38 }, + new object[] { "Fred", 3 }, + new object[] { "April", 17 }, + new object[] { "Novac", 48 }, + new object[] { "Idan", 33 }, + new object[] { "Jolyon", 76 }, + new object[] { "Ant", 68 }, + new object[] { "Ernie", 43 }, + new object[] { "Joel", 99 }, + new object[] { "Dan", 78 }, + new object[] { "Dave", 103 }, + new object[] { "Hillary", 79 }, + new object[] { "Grant", 88 }, + new object[] { "JJ", 14 }, + new object[] { "Bill", 90 }, + new object[] { "Larry", 4 }, + new object[] { "Jenny", 3 }, + new object[] { "Bob", 9 }, + new object[] { null, 48 } + }, codecType).SetName("{m}(Case3,{2})"); + + yield return new TestCaseData(unknowLogicalTypeSchema, new object[] + { + new object[] { "John", 23 } + }, codecType).SetName("{m}(Case4,{2})"); + } + } + /// /// Reading & writing of specific (custom) record objects /// - /// /// /// - [TestCase(specificSchema, new object[] { new object[] { "John", 23 } }, Codec.Type.Deflate, TestName = "TestSpecificData0")] - [TestCase(specificSchema, new object[] { new object[] { "Jane", 23 } }, Codec.Type.Deflate, TestName = "TestSpecificData1")] - [TestCase(specificSchema, new object[] { new object[] { "John", 23 }, new object[] { "Jane", 99 }, new object[] { "Jeff", 88 } }, Codec.Type.Deflate, TestName = "TestSpecificData2")] - [TestCase(specificSchema, new object[] { new object[] {"John", 23}, new object[] { "Jane", 99 }, new object[] { "Jeff", 88 }, - new object[] {"James", 13}, new object[] { "June", 109 }, new object[] { "Lloyd", 18 }, - new object[] {"Jenny", 3}, new object[] { "Bob", 9 }, new object[] { null, 48 }}, Codec.Type.Deflate, TestName = "TestSpecificData3")] - [TestCase(specificSchema, new object[] { new object[] { "John", 23 } }, Codec.Type.Null, TestName = "TestSpecificData4")] - [TestCase(specificSchema, new object[] { new object[] { "Jane", 23 } }, Codec.Type.Null, TestName = "TestSpecificData5")] - [TestCase(specificSchema, new object[] { new object[] { "John", 23 }, new object[] { "Jane", 99 }, new object[] { "Jeff", 88 } }, Codec.Type.Null, TestName = "TestSpecificData6")] - [TestCase(specificSchema, new object[] { new object[] {"John", 23}, new object[] { "Jane", 99 }, new object[] { "Jeff", 88 }, - new object[] {"James", 13}, new object[] { "June", 109 }, new object[] { "Lloyd", 18 }, - new object[] {"Jamie", 53}, new object[] { "Fanessa", 101 }, new object[] { "Kan", 18 }, - new object[] {"Janey", 33}, new object[] { "Deva", 102 }, new object[] { "Gavin", 28 }, - new object[] {"Lochy", 113}, new object[] { "Nickie", 10 }, new object[] { "Liddia", 38 }, - new object[] {"Fred", 3}, new object[] { "April", 17 }, new object[] { "Novac", 48 }, - new object[] {"Idan", 33}, new object[] { "Jolyon", 76 }, new object[] { "Ant", 68 }, - new object[] {"Ernie", 43}, new object[] { "Joel", 99 }, new object[] { "Dan", 78 }, - new object[] {"Dave", 103}, new object[] { "Hillary", 79 }, new object[] { "Grant", 88 }, - new object[] {"JJ", 14}, new object[] { "Bill", 90 }, new object[] { "Larry", 4 }, - new object[] {"Jenny", 3}, new object[] { "Bob", 9 }, new object[] { null, 48 }}, Codec.Type.Null, TestName = "TestSpecificData7")] + [TestCaseSource(nameof(TestSpecificDataSource))] public void TestSpecificData(string schemaStr, object[] recs, Codec.Type codecType) { // create and write out @@ -95,6 +177,41 @@ public void TestSpecificData(string schemaStr, object[] recs, Codec.Type codecTy } } + private static IEnumerable TestAppendSpecificDataSource() + { + foreach (Codec.Type codecType in Enum.GetValues(typeof(Codec.Type))) + { + yield return new TestCaseData(specificSchema, + new object[] + { + new object[] { "John", 23 } + }, + new object[] + { + new object[] { "Jane", 21 } + }, codecType).SetName("{m}(Case0,{3})"); + + yield return new TestCaseData(specificSchema, + new object[] + { + new object[] { "John", 23 }, + new object[] { "Jane", 99 }, + new object[] { "Jeff", 88 }, + new object[] { "James", 13 }, + new object[] { "June", 109 }, + new object[] { "Lloyd", 18 }, + new object[] { "Jenny", 3 }, + new object[] { "Bob", 9 }, + new object[] { null, 48 } + }, + new object[] + { + new object[] { "Hillary", 79 }, + new object[] { "Grant", 88 } + }, codecType).SetName("{m}(Case1,{3})"); + } + } + /// /// Test appending of specific (custom) record objects /// @@ -102,18 +219,7 @@ public void TestSpecificData(string schemaStr, object[] recs, Codec.Type codecTy /// initial records /// append records /// initial compression codec type - [TestCase(specificSchema, new object[] { new object[] { "John", 23 } }, new object[] { new object[] { "Jane", 21 } }, Codec.Type.Deflate, TestName = "TestAppendSpecificData0")] - [TestCase(specificSchema, new object[] { new object[] { "John", 23 } }, new object[] { new object[] { "Jane", 21 } }, Codec.Type.Null, TestName = "TestAppendSpecificData1")] - [TestCase(specificSchema, new object[] { new object[] {"John", 23}, new object[] { "Jane", 99 }, new object[] { "Jeff", 88 }, - new object[] {"James", 13}, new object[] { "June", 109 }, new object[] { "Lloyd", 18 }, - new object[] {"Jenny", 3}, new object[] { "Bob", 9 }, new object[] { null, 48 }}, - new object[] { new object[] { "Hillary", 79 }, - new object[] { "Grant", 88 } }, Codec.Type.Deflate, TestName = "TestAppendSpecificData2")] - [TestCase(specificSchema, new object[] { new object[] {"John", 23}, new object[] { "Jane", 99 }, new object[] { "Jeff", 88 }, - new object[] {"James", 13}, new object[] { "June", 109 }, new object[] { "Lloyd", 18 }, - new object[] {"Jenny", 3}, new object[] { "Bob", 9 }, new object[] { null, 48 }}, - new object[] { new object[] { "Hillary", 79 }, - new object[] { "Grant", 88 } }, Codec.Type.Null, TestName = "TestAppendSpecificData3")] + [TestCaseSource(nameof(TestAppendSpecificDataSource))] public void TestAppendSpecificData(string schemaStr, object[] recs, object[] appendRecs, Codec.Type codecType) { IList records = MakeRecords(recs); @@ -161,84 +267,50 @@ public void TestAppendSpecificData(string schemaStr, object[] recs, object[] app } } + private static IEnumerable TestGenericDataSource() + { + foreach (Codec.Type codecType in Enum.GetValues(typeof(Codec.Type))) + { + yield return new TestCaseData( + "{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"null\"}]}", new object[] { "f1", null }, codecType) + .SetName("{m}(null,{2})"); + yield return new TestCaseData( + "{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"boolean\"}]}", new object[] { "f1", true }, codecType) + .SetName("{m}(true,{2})"); + yield return new TestCaseData( + "{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"boolean\"}]}", new object[] { "f1", false }, codecType) + .SetName("{m}(false,{2})"); ; + yield return new TestCaseData( + "{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"int\"}]}", new object[] { "f1", 101 }, codecType) + .SetName("{m}(int,{2})"); ; + yield return new TestCaseData( + "{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"long\"}]}", new object[] { "f1", 101L }, codecType) + .SetName("{m}(long,{2})"); ; + yield return new TestCaseData( + "{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"float\"}]}", new object[] { "f1", 101.78f }, codecType) + .SetName("{m}(float,{2})"); ; + yield return new TestCaseData( + "{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"double\"}]}", new object[] { "f1", 101.78 }, codecType) + .SetName("{m}(double,{2})"); ; + yield return new TestCaseData( + "{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"string\"}]}", new object[] { "f1", "A" }, codecType) + .SetName("{m}(string,{2})"); ; + yield return new TestCaseData( + "{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"bytes\"}]}", new object[] { "f1", new byte[] { 0, 1 } }, codecType) + .SetName("{m}(bytes,{2})"); ; + } + } + /// /// Reading & writing of generic record objects /// /// /// /// - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"null\"}]}", - new object[] { "f1", null }, Codec.Type.Deflate)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"boolean\"}]}", - new object[] { "f1", true }, Codec.Type.Deflate)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"boolean\"}]}", - new object[] { "f1", false }, Codec.Type.Deflate)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"int\"}]}", - new object[] { "f1", 101 }, Codec.Type.Deflate)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"long\"}]}", - new object[] { "f1", 101L }, Codec.Type.Deflate)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"float\"}]}", - new object[] { "f1", 101.78f }, Codec.Type.Deflate)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"double\"}]}", - new object[] { "f1", 101.78 }, Codec.Type.Deflate)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"string\"}]}", - new object[] { "f1", "A" }, Codec.Type.Deflate)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"bytes\"}]}", - new object[] { "f1", new byte[] { 0, 1 } }, Codec.Type.Deflate)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":" + - "[{\"name\":\"f1\", \"type\":{\"type\": \"enum\", \"name\": \"e\", \"symbols\":[\"s1\", \"s2\"]}}]}", - new object[] { "f1", "s2" }, Codec.Type.Deflate)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":" + - "[{\"name\":\"f1\", \"type\":{\"type\": \"array\", \"items\": \"int\"}}]}", - new object[] { "f1", new object[] { 0, 1, 101 } }, Codec.Type.Deflate)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":" + - "[{\"name\":\"f1\", \"type\":{\"type\": \"array\", \"items\": \"int\"}}]}", - new object[] { "f1", new int[] { 0, 1, 101 } }, Codec.Type.Deflate)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":" + - "[{\"name\":\"f1\", \"type\":[\"int\", \"long\"]}]}", - new object[] { "f1", 100 }, Codec.Type.Deflate)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":" + - "[{\"name\":\"f1\", \"type\":[\"int\", \"long\"]}]}", - new object[] { "f1", 100L }, Codec.Type.Deflate)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":" + - "[{\"name\":\"f1\", \"type\":{\"type\": \"fixed\", \"name\": \"f\", \"size\": 2}}]}", - new object[] { "f1", new byte[] { 1, 2 } }, Codec.Type.Deflate)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"null\"}]}", - new object[] { "f1", null }, Codec.Type.Null)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"boolean\"}]}", - new object[] { "f1", true }, Codec.Type.Null)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"boolean\"}]}", - new object[] { "f1", false }, Codec.Type.Null)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"int\"}]}", - new object[] { "f1", 101 }, Codec.Type.Null)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"long\"}]}", - new object[] { "f1", 101L }, Codec.Type.Null)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"float\"}]}", - new object[] { "f1", 101.78f }, Codec.Type.Null)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"double\"}]}", - new object[] { "f1", 101.78 }, Codec.Type.Null)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"string\"}]}", - new object[] { "f1", "A" }, Codec.Type.Null)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"bytes\"}]}", - new object[] { "f1", new byte[] { 0, 1 } }, Codec.Type.Null)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":" + - "[{\"name\":\"f1\", \"type\":{\"type\": \"enum\", \"name\": \"e\", \"symbols\":[\"s1\", \"s2\"]}}]}", - new object[] { "f1", "s2" }, Codec.Type.Null)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":" + - "[{\"name\":\"f1\", \"type\":{\"type\": \"array\", \"items\": \"int\"}}]}", - new object[] { "f1", new object[] { 0, 1, 101 } }, Codec.Type.Null)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":" + - "[{\"name\":\"f1\", \"type\":{\"type\": \"array\", \"items\": \"int\"}}]}", - new object[] { "f1", new int[] { 0, 1, 101 } }, Codec.Type.Null)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":" + - "[{\"name\":\"f1\", \"type\":[\"int\", \"long\"]}]}", - new object[] { "f1", 100 }, Codec.Type.Null)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":" + - "[{\"name\":\"f1\", \"type\":[\"int\", \"long\"]}]}", - new object[] { "f1", 100L }, Codec.Type.Null)] + [TestCaseSource(nameof(TestGenericDataSource))] public void TestGenericData(string schemaStr, object[] value, Codec.Type codecType) { - foreach(var rwFactory in GenericOptions()) + foreach (var rwFactory in GenericOptions()) { // Create and write out MemoryStream dataFileOutputStream = new MemoryStream(); @@ -260,7 +332,23 @@ public void TestGenericData(string schemaStr, object[] value, Codec.Type codecTy } Assert.IsTrue((readFoos != null && readFoos.Count > 0), - string.Format(@"Generic object: {0} did not serialise/deserialise correctly", readFoos)); + string.Format(@"Generic object: {0} did not serialize/deserialize correctly", readFoos)); + } + } + + private static IEnumerable TestAppendGenericDataSource() + { + foreach (Codec.Type codecType in Enum.GetValues(typeof(Codec.Type))) + { + yield return new TestCaseData( + "{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"boolean\"}]}", new object[] { "f1", true }, new object[] { "f1", false }, codecType) + .SetName("{m}(bool,{3})"); + yield return new TestCaseData( + "{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"int\"}]}", new object[] { "f1", 1 }, new object[] { "f1", 2 }, codecType) + .SetName("{m}(int,{3})"); + yield return new TestCaseData( + "{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"string\"}]}", new object[] { "f1", "A" }, new object[] { "f1", "B" }, codecType) + .SetName("{m}(string,{3})"); } } @@ -270,11 +358,8 @@ public void TestGenericData(string schemaStr, object[] value, Codec.Type codecTy /// schema /// initial records /// append records - /// innitial compression codec type - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"boolean\"}]}", - new object[] { "f1", true }, new object[] { "f1", false }, Codec.Type.Deflate)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"int\"}]}", - new object[] { "f1", 1 }, new object[] { "f1", 2 }, Codec.Type.Null)] + /// initial compression codec type + [TestCaseSource(nameof(TestAppendGenericDataSource))] public void TestAppendGenericData(string schemaStr, object[] recs, object[] appendRecs, Codec.Type codecType) { foreach (var rwFactory in GenericOptions()) @@ -311,7 +396,7 @@ public void TestAppendGenericData(string schemaStr, object[] recs, object[] appe Assert.NotNull(readFoos); Assert.AreEqual((recs.Length + appendRecs.Length) / 2, readFoos.Count, - $"Generic object: {readFoos} did not serialise/deserialise correctly"); + $"Generic object: {readFoos} did not serialize/deserialize correctly"); } } @@ -346,9 +431,7 @@ public void OpenAppendWriter_IncorrectOutStream_Throws() /// DeflateStream as it is a standard non-seekable Stream that has the same behavior as the /// NetworkStream, which we should handle. /// - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":" + - "[{\"name\":\"f1\", \"type\":[\"int\", \"long\"]}]}", - new object[] { "f1", 100L }, Codec.Type.Null)] + [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":[\"int\", \"long\"]}]}", new object[] { "f1", 100L }, Codec.Type.Null)] public void TestNonSeekableStream(string schemaStr, object[] value, Codec.Type codecType) { foreach (var rwFactory in GenericOptions()) @@ -383,7 +466,35 @@ public void TestNonSeekableStream(string schemaStr, object[] value, Codec.Type c } Assert.IsTrue((readFoos != null && readFoos.Count > 0), - string.Format(@"Generic object: {0} did not serialise/deserialise correctly", readFoos)); + string.Format(@"Generic object: {0} did not serialize/deserialize correctly", readFoos)); + } + } + + private static IEnumerable TestPrimitiveDataSource() + { + foreach (Codec.Type codecType in Enum.GetValues(typeof(Codec.Type))) + { + yield return new TestCaseData("[\"boolean\", \"null\"]", null, codecType); + yield return new TestCaseData("[\"boolean\", \"null\"]", true, codecType); + yield return new TestCaseData("[\"int\", \"long\"]", 100, codecType); + yield return new TestCaseData("[\"int\", \"long\"]", 100L, codecType); + yield return new TestCaseData("[\"float\", \"double\"]", 100.75, codecType); + yield return new TestCaseData("[\"float\", \"double\"]", 23.67f, codecType); + yield return new TestCaseData("{\"type\": \"boolean\"}", true, codecType); + yield return new TestCaseData("{\"type\": \"boolean\"}", false, codecType); + yield return new TestCaseData("{\"type\": \"string\"}", "John", codecType); + yield return new TestCaseData("{\"type\": [\"null\",\"string\"]}", null, codecType); + yield return new TestCaseData("{\"type\": \"int\"}", 1, codecType); + yield return new TestCaseData("{\"type\": \"long\"}", 12312313123L, codecType); + yield return new TestCaseData("{\"type\": \"float\"}", 0.0f, codecType); + yield return new TestCaseData("{\"type\": \"double\"}", 0.0, codecType); + yield return new TestCaseData("[{\"type\": \"array\", \"items\": \"float\"}, \"double\"]", new float[] { 23.67f, 22.78f }, codecType); + yield return new TestCaseData("[{\"type\": \"array\", \"items\": \"float\"}, \"double\"]", 100.89, codecType); + yield return new TestCaseData("[{\"type\": \"array\", \"items\": \"string\"}, \"string\"]", "a", codecType); + yield return new TestCaseData("[{\"type\": \"array\", \"items\": \"string\"}, \"string\"]", new string[] { "a", "b" }, codecType); + yield return new TestCaseData("[{\"type\": \"array\", \"items\": \"bytes\"}, \"bytes\"]", new byte[] { 1, 2, 3 }, codecType); + yield return new TestCaseData("[{\"type\": \"array\", \"items\": \"bytes\"}, \"bytes\"]", new object[] { new byte[] { 1, 2 }, new byte[] { 3, 4 } }, codecType); + yield return new TestCaseData("[{\"type\": \"enum\", \"symbols\": [\"s1\", \"s2\"], \"name\": \"e\"}, \"string\"]", "h1", codecType); } } @@ -393,51 +504,7 @@ public void TestNonSeekableStream(string schemaStr, object[] value, Codec.Type c /// /// /// - [TestCase("{\"type\": \"boolean\"}", true, Codec.Type.Deflate)] - [TestCase("{\"type\": \"boolean\"}", false, Codec.Type.Deflate)] - [TestCase("{\"type\": \"boolean\"}", true, Codec.Type.Null)] - [TestCase("{\"type\": \"boolean\"}", false, Codec.Type.Null)] - [TestCase("[\"boolean\", \"null\"]", null, Codec.Type.Deflate)] - [TestCase("[\"boolean\", \"null\"]", true, Codec.Type.Deflate)] - [TestCase("[\"int\", \"long\"]", 100, Codec.Type.Deflate)] - [TestCase("[\"int\", \"long\"]", 100L, Codec.Type.Deflate)] - [TestCase("[\"float\", \"double\"]", 100.75, Codec.Type.Deflate)] - [TestCase("[\"float\", \"double\"]", 23.67f, Codec.Type.Deflate)] - [TestCase("[{\"type\": \"array\", \"items\": \"float\"}, \"double\"]", new float[] { 23.67f, 22.78f }, Codec.Type.Deflate)] - [TestCase("[{\"type\": \"array\", \"items\": \"float\"}, \"double\"]", 100.89, Codec.Type.Deflate)] - [TestCase("[{\"type\": \"array\", \"items\": \"string\"}, \"string\"]", "a", Codec.Type.Deflate)] - [TestCase("[{\"type\": \"array\", \"items\": \"string\"}, \"string\"]", new string[] { "a", "b" }, Codec.Type.Deflate)] - [TestCase("[{\"type\": \"array\", \"items\": \"bytes\"}, \"bytes\"]", new byte[] { 1, 2, 3 }, Codec.Type.Deflate)] - [TestCase("[{\"type\": \"array\", \"items\": \"bytes\"}, \"bytes\"]", new object[] { new byte[] { 1, 2 }, new byte[] { 3, 4 } }, Codec.Type.Deflate)] - [TestCase("[{\"type\": \"enum\", \"symbols\": [\"s1\", \"s2\"], \"name\": \"e\"}, \"string\"]", "h1", Codec.Type.Deflate)] - [TestCase("{\"type\":\"string\"}", "John", Codec.Type.Deflate)] - [TestCase("{\"type\":[\"null\",\"string\"]}", null, Codec.Type.Deflate)] - [TestCase("{\"type\":\"int\"}", 1, Codec.Type.Deflate)] - [TestCase("{\"type\":\"boolean\"}", false, Codec.Type.Deflate)] - [TestCase("{\"type\":\"long\"}", 12312313123L, Codec.Type.Deflate)] - [TestCase("{\"type\":\"float\"}", 0.0f, Codec.Type.Deflate)] - [TestCase("{\"type\":\"double\"}", 0.0, Codec.Type.Deflate)] - [TestCase("[\"boolean\", \"null\"]", null, Codec.Type.Null)] - [TestCase("[\"boolean\", \"null\"]", true, Codec.Type.Null)] - [TestCase("[\"int\", \"long\"]", 100, Codec.Type.Null)] - [TestCase("[\"int\", \"long\"]", 100L, Codec.Type.Null)] - [TestCase("[\"float\", \"double\"]", 100.75, Codec.Type.Null)] - [TestCase("[\"float\", \"double\"]", 23.67f, Codec.Type.Null)] - [TestCase("[{\"type\": \"array\", \"items\": \"float\"}, \"double\"]", new float[] { 23.67f, 22.78f }, Codec.Type.Null)] - [TestCase("[{\"type\": \"array\", \"items\": \"float\"}, \"double\"]", 100.89, Codec.Type.Null)] - [TestCase("[{\"type\": \"array\", \"items\": \"string\"}, \"string\"]", "a", Codec.Type.Null)] - [TestCase("[{\"type\": \"array\", \"items\": \"string\"}, \"string\"]", new string[] { "a", "b" }, Codec.Type.Null)] - [TestCase("[{\"type\": \"array\", \"items\": \"bytes\"}, \"bytes\"]", new byte[] { 1, 2, 3 }, Codec.Type.Null)] - [TestCase("[{\"type\": \"array\", \"items\": \"bytes\"}, \"bytes\"]", new object[] { new byte[] { 1, 2 }, new byte[] { 3, 4 } }, Codec.Type.Null)] - [TestCase("[{\"type\": \"enum\", \"symbols\": [\"s1\", \"s2\"], \"name\": \"e\"}, \"string\"]", "h1", Codec.Type.Null)] - [TestCase("{\"type\":\"string\"}", "John", Codec.Type.Null)] - [TestCase("{\"type\":[\"null\",\"string\"]}", null, Codec.Type.Null)] - [TestCase("{\"type\":\"int\"}", 1, Codec.Type.Null)] - [TestCase("{\"type\":\"boolean\"}", false, Codec.Type.Null)] - [TestCase("{\"type\":\"long\"}", 12312313123L, Codec.Type.Null)] - [TestCase("{\"type\":\"float\"}", 0.0f, Codec.Type.Null)] - [TestCase("{\"type\":\"double\"}", 0.0, Codec.Type.Null)] - [TestCase("{\"type\":\"string\"}", "test", Codec.Type.Null)] + [TestCaseSource(nameof(TestPrimitiveDataSource))] public void TestPrimitiveData(string schemaStr, object value, Codec.Type codecType) { foreach(var rwFactory in GenericOptions()) @@ -455,32 +522,28 @@ public void TestPrimitiveData(string schemaStr, object value, Codec.Type codecTy } } + private static IEnumerable TestMetaDataSource() + { + foreach (Codec.Type codecType in Enum.GetValues(typeof(Codec.Type))) + { + foreach (bool useTypeGetter in new bool[] { true, false }) + { + yield return new TestCaseData("bytesTest", new byte[] { 1, 2, 3 }, codecType, useTypeGetter); + yield return new TestCaseData("stringTest", "testVal", codecType, useTypeGetter); + yield return new TestCaseData("longTest", 12312313123L, codecType, useTypeGetter); + yield return new TestCaseData("bytesTest", new byte[] { 1 }, codecType, useTypeGetter); + yield return new TestCaseData("longTest", -1211212L, codecType, useTypeGetter); + } + } + } + /// /// Reading & writing of header meta data /// - /// /// /// - [TestCase("bytesTest", new byte[] { 1, 2, 3 }, Codec.Type.Null, true)] - [TestCase("stringTest", "testVal", Codec.Type.Null, true)] - [TestCase("longTest", 12312313123L, Codec.Type.Null, true)] - [TestCase("bytesTest", new byte[] { 1 }, Codec.Type.Null, true)] - [TestCase("longTest", -1211212L, Codec.Type.Null, true)] - [TestCase("bytesTest", new byte[] { 1, 2, 3 }, Codec.Type.Deflate, true)] - [TestCase("stringTest", "testVal", Codec.Type.Deflate, true)] - [TestCase("longTest", 12312313123L, Codec.Type.Deflate, true)] - [TestCase("bytesTest", new byte[] { 1 }, Codec.Type.Deflate, true)] - [TestCase("longTest", -21211212L, Codec.Type.Deflate, true)] - [TestCase("bytesTest", new byte[] { 1, 2, 3 }, Codec.Type.Null, false)] - [TestCase("stringTest", "testVal", Codec.Type.Null, false)] - [TestCase("longTest", 12312313123L, Codec.Type.Null, false)] - [TestCase("bytesTest", new byte[] { 1 }, Codec.Type.Null, false)] - [TestCase("longTest", -1211212L, Codec.Type.Null, false)] - [TestCase("bytesTest", new byte[] { 1, 2, 3 }, Codec.Type.Deflate, false)] - [TestCase("stringTest", "testVal", Codec.Type.Deflate, false)] - [TestCase("longTest", 12312313123L, Codec.Type.Deflate, false)] - [TestCase("bytesTest", new byte[] { 1 }, Codec.Type.Deflate, false)] - [TestCase("longTest", -21211212L, Codec.Type.Deflate, false)] + /// + [TestCaseSource(nameof(TestMetaDataSource))] public void TestMetaData(string key, object value, Codec.Type codecType, bool useTypeGetter) { // create and write out @@ -507,19 +570,27 @@ public void TestMetaData(string key, object value, Codec.Type codecType, bool us } } + private static IEnumerable TestPartialReadSource() + { + foreach (Codec.Type codecType in Enum.GetValues(typeof(Codec.Type))) + { + yield return new TestCaseData(specificSchema, codecType, 0, 330).SetName("{m}({1},{2},{3})"); + yield return new TestCaseData(specificSchema, codecType, 1, 330).SetName("{m}({1},{2},{3})"); + yield return new TestCaseData(specificSchema, codecType, 135, 330).SetName("{m}({1},{2},{3})"); + yield return new TestCaseData(specificSchema, codecType, 194, 264).SetName("{m}({1},{2},{3})"); + } + + // This is only for Null codec + yield return new TestCaseData(specificSchema, Codec.Type.Null, 888, 165).SetName("{m}({1},{2},{3})"); + } + /// /// Partial reading of file / stream from /// position in stream /// /// - /// /// - [TestCase(specificSchema, Codec.Type.Null, 1, 330)] // 330 - [TestCase(specificSchema, Codec.Type.Null, 135, 330)] // 330 - [TestCase(specificSchema, Codec.Type.Null, 194, 264)] // 264 - [TestCase(specificSchema, Codec.Type.Null, 235, 264)] // 264 - [TestCase(specificSchema, Codec.Type.Null, 888, 165)] // 165 - [TestCase(specificSchema, Codec.Type.Null, 0, 330)] // 330 + [TestCaseSource(nameof(TestPartialReadSource))] public void TestPartialRead(string schemaStr, Codec.Type codecType, int position, int expectedRecords) { // create and write out @@ -569,11 +640,9 @@ public void TestPartialRead(string schemaStr, Codec.Type codecType, int position /// Tests reading from sync boundaries. /// /// - /// /// - [TestCase(specificSchema, Codec.Type.Null)] - [TestCase(specificSchema, Codec.Type.Deflate)] - public void TestPartialReadAll(string schemaStr, Codec.Type codecType) + [Test] + public void TestPartialReadAll([Values(specificSchema)] string schemaStr, [Values] Codec.Type codecType) { // create and write out IList records = MakeRecords(GetTestFooObject()); @@ -636,14 +705,11 @@ public void TestPartialReadAll(string schemaStr, Codec.Type codecType) /// Test leaveOpen flag /// /// - /// /// - /// - [TestCase(specificSchema, Codec.Type.Null, true, false)] - [TestCase(specificSchema, Codec.Type.Null, true, true)] - [TestCase(specificSchema, Codec.Type.Null, false, false)] - [TestCase(specificSchema, Codec.Type.Null, false, true)] - public void TestLeaveOpen(string schemaStr, Codec.Type codecType, bool leaveWriteOpen, bool leaveReadOpen) + /// + /// + [Test] + public void TestLeaveOpen([Values(specificSchema)] string schemaStr, [Values] Codec.Type codecType, [Values] bool leaveWriteOpen, [Values] bool leaveReadOpen) { // create and write out IList records = MakeRecords(GetTestFooObject()); @@ -723,19 +789,23 @@ private static void AssertNumRecordsFromPosition( IFileReader reader, long Assert.AreEqual( expectedRecords, readRecords, "didn't read expected records from position " + position ); } + private static IEnumerable TestSyncAndSeekPositionsSource() + { + foreach (Codec.Type codecType in Enum.GetValues(typeof(Codec.Type))) + { + yield return new TestCaseData(specificSchema, codecType, 2, 0, 1).SetName("{m}({1},{2},{3},{4})"); + yield return new TestCaseData(specificSchema, codecType, 10, 1, 4).SetName("{m}({1},{2},{3},{4})"); + yield return new TestCaseData(specificSchema, codecType, 200, 111, 15).SetName("{m}({1},{2},{3},{4})"); + yield return new TestCaseData(specificSchema, codecType, 1000, 588, 998).SetName("{m}({1},{2},{3},{4})"); + } + } + /// /// Reading all sync positions and /// verifying them with subsequent seek /// positions /// - [TestCase(specificSchema, Codec.Type.Null, 2, 0, 1)] - [TestCase(specificSchema, Codec.Type.Null, 10, 1, 4)] - [TestCase(specificSchema, Codec.Type.Null, 200, 111, 15)] - [TestCase(specificSchema, Codec.Type.Null, 1000, 588, 998)] - [TestCase(specificSchema, Codec.Type.Deflate, 2, 0, 1)] - [TestCase(specificSchema, Codec.Type.Deflate, 10, 1, 4)] - [TestCase(specificSchema, Codec.Type.Deflate, 200, 111, 15)] - [TestCase(specificSchema, Codec.Type.Deflate, 1000, 588, 998)] + [TestCaseSource(nameof(TestSyncAndSeekPositionsSource))] public void TestSyncAndSeekPositions(string schemaStr, Codec.Type codecType, int iterations, int firstSyncPosition, int secondSyncPosition) { // create and write out @@ -775,7 +845,7 @@ public void TestSyncAndSeekPositions(string schemaStr, Codec.Type codecType, int } } - // verify syncs wth seeks + // verify syncs with seeks reader.Sync(0); // first sync Assert.AreEqual(reader.PreviousSync(), syncs[0], string.Format("Error syncing reader to position: {0}", syncs[0])); @@ -819,6 +889,130 @@ public void TestDifferentReaderSchema() } } + /// + /// Reading & writing many specific record objects + /// + /// + /// + [Test] + public void TestLargeSpecificData([Values] Codec.Type codecType, [Values(0, 1000, 100000)] int numOfRecords) + { + foreach (var rwFactory in SpecificOptions()) + { + MemoryStream dataFileOutputStream = new MemoryStream(); + Schema schema = Schema.Parse(specificSchema); + using (IFileWriter dataFileWriter = rwFactory.CreateWriter(dataFileOutputStream, schema, Codec.CreateCodec(codecType))) + { + for (int index = 0; index < numOfRecords; index++) + { + dataFileWriter.Append(new Foo() { name = $"Name-{index}", age = index }); + } + } + + MemoryStream dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray()); + + // Read back and verify + using (IFileReader reader = rwFactory.CreateReader(dataFileInputStream, null)) + { + int index = 0; + foreach (Foo record in reader.NextEntries) + { + Assert.AreEqual($"Name-{index}", record.name); + Assert.AreEqual(index, record.age); + index++; + } + + Assert.AreEqual(numOfRecords, index); + } + } + } + + /// + /// Reading and writing using optional codecs + /// + /// + /// + [TestCase("zstd", true)] + [TestCase("deflate", false)] + [TestCase("null", false)] + [TestCase("snappy", false)] + [TestCase("bzip2", false)] + [TestCase("xz", false)] + [TestCase("zstandard", false)] + public void TestOptionalCodecs(string codecToUse, bool expectResolverProvidedCodec) + { + var resolverProvidedCodec = false; + + var fakeCodec = new FakeZstdCodec(); + Codec codecResolver(string codecString) + { + if (codecString == "zstd") + { + resolverProvidedCodec = true; + return fakeCodec; + } + + return null; + } + + Codec.RegisterResolver(codecResolver); + + RecordSchema schema = Schema.Parse( "{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"string\"}," + + "{\"name\":\"f2\", \"type\":\"string\"}]}" ) as RecordSchema; + + foreach(var rwFactory in GenericOptions()) + { + using (MemoryStream dataFileOutputStream = new MemoryStream()) + { + using (var writer = rwFactory.CreateWriter(dataFileOutputStream, schema, fakeCodec)) + { + writer.Append(mkRecord(new [] { "f1", "f1val", "f2", "f2val" }, schema)); + } + + using (var dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray())) + using (IFileReader reader = rwFactory.CreateReader(dataFileInputStream, schema)) + { + GenericRecord result = reader.Next(); + Assert.AreEqual("f1val", result["f1"]); + Assert.AreEqual("f2val", result["f2"]); + } + } + } + + Assert.AreEqual(expectResolverProvidedCodec, resolverProvidedCodec); + } + + [TestCase("")] + [TestCase("blahblahblah")] + public void UnknownCodecFromStringException(string codec) + { + Assert.Throws(typeof(AvroRuntimeException), () => Codec.CreateCodecFromString(codec)); + } + + [TestCase((Codec.Type)(-1))] // "Invalid" Codec.Type + public void UnknownCodecFromType(Codec.Type codec) + { + Assert.Throws(typeof(AvroRuntimeException), () => Codec.CreateCodec(codec)); + } + + [TestCase("deflate")] + [TestCase("null")] + [TestCase(null)] // If codec is absent, it is assumed to be "null" + [TestCase("snappy")] + [TestCase("bzip2")] + [TestCase("xz")] + [TestCase("zstandard")] + public void KnownCodecFromString(string codec) + { + Assert.NotNull(Codec.CreateCodecFromString(codec)); + } + + [Test] + public void KnownCodecFromType([Values] Codec.Type codec) + { + Assert.NotNull(Codec.CreateCodec(codec)); + } + private bool CheckPrimitive(Stream input, T value, ReaderWriterSet.ReaderFactory createReader) { IFileReader reader = createReader(input, null); @@ -1048,4 +1242,40 @@ public override string ToString() return string.Format("Name: {0}, Age: {1}", name, age); } } + + class FakeZstdCodec : Codec + { + private DeflateCodec _codec = new DeflateCodec(); + public override byte[] Compress(byte[] uncompressedData) + { + return _codec.Compress(uncompressedData); + } + + public override void Compress(MemoryStream inputStream, MemoryStream outputStream) + { + _codec.Compress(inputStream, outputStream); + } + + public override byte[] Decompress(byte[] compressedData, int length) + { + return _codec.Decompress(compressedData, length); + } + + public override bool Equals(object other) + { + if (other == null) return false; + + return this == other; + } + + public override int GetHashCode() + { + return GetName().GetHashCode(); + } + + public override string GetName() + { + return "zstd"; + } + } } diff --git a/lang/csharp/src/apache/test/Generic/GenericEnumTests.cs b/lang/csharp/src/apache/test/Generic/GenericEnumTests.cs new file mode 100644 index 00000000000..aba0038ea1a --- /dev/null +++ b/lang/csharp/src/apache/test/Generic/GenericEnumTests.cs @@ -0,0 +1,72 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using Avro.Generic; +using NUnit.Framework; + +namespace Avro.test.Generic +{ + [TestFixture] + public class GenericEnumTests + { + private const string baseSchema = "{\"type\": \"enum\", \"name\": \"Test\", \"symbols\": " + + "[\"Unknown\", \"A\", \"B\"], \"default\": \"Unknown\" }"; + + [Test] + public void TestEquals() + { + GenericEnum genericEnum = GetBaseGenericEnum(); + GenericEnum genericEnum2 = GetBaseGenericEnum(); + + Assert.IsTrue(genericEnum.Equals(genericEnum2)); + } + + [Test] + public void TestEqualsNotEqual() + { + GenericEnum genericEnum = GetBaseGenericEnum(); + GenericEnum genericEnum2 = new GenericEnum(Schema.Parse(baseSchema) as EnumSchema, "B"); + + Assert.IsFalse(genericEnum.Equals(genericEnum2)); + } + + [Test] + public void TestEqualsObject() + { + GenericEnum genericEnum = GetBaseGenericEnum(); + object genericEnum2 = genericEnum; + + Assert.IsTrue(genericEnum.Equals(genericEnum2)); + } + + [Test] + public void TestEqualsObjectNullObject() + { + GenericEnum genericEnum = GetBaseGenericEnum(); + + Assert.IsFalse(genericEnum.Equals(null)); + } + + private GenericEnum GetBaseGenericEnum() + { + GenericEnum genericEnum = new GenericEnum(Schema.Parse(baseSchema) as EnumSchema, "A"); + + return genericEnum; + } + } +} diff --git a/lang/csharp/src/apache/test/Generic/GenericRecordTests.cs b/lang/csharp/src/apache/test/Generic/GenericRecordTests.cs new file mode 100644 index 00000000000..9ae0e6f7e05 --- /dev/null +++ b/lang/csharp/src/apache/test/Generic/GenericRecordTests.cs @@ -0,0 +1,238 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using Avro.Generic; +using NUnit.Framework; + +namespace Avro.test.Generic +{ + [TestFixture] + public class GenericRecordTests + { + private const string baseSchema = "{\"type\":\"record\",\"name\":\"r\",\"fields\":" + + "[{\"name\":\"f2\",\"type\":\"int\"},{\"name\":\"f1\",\"type\":\"boolean\"}]}"; + + [Test] + public void TestAddByFieldNameThrows() + { + GenericRecord genericRecord = GetBaseGenericRecord(); + + // Field does not exist + Assert.Throws(() => { genericRecord.Add("badField", "test"); }); + } + + [Test] + public void TestAddByPosition() + { + GenericRecord genericRecord = GetBaseGenericRecord(); + + genericRecord.Add(0, 2); + + object value = genericRecord.GetValue(0); + + Assert.IsNotNull(value); + Assert.IsTrue(value is int); + Assert.AreEqual(2, (int)value); + } + + [Test] + public void TestAddByPositionThrows() + { + GenericRecord genericRecord = GetBaseGenericRecord(); + + Assert.Throws(() => { genericRecord.Add(2, 2); }); + } + + [Test] + public void TestEquals() + { + GenericRecord genericRecord = GetBaseGenericRecord(); + GenericRecord genericRecord2 = GetBaseGenericRecord(); + + Assert.IsTrue(genericRecord.Equals(genericRecord2)); + } + + [Test] + public void TestEqualsNotEqual() + { + GenericRecord genericRecord = GetBaseGenericRecord(); + GenericRecord genericRecord2 = GetBaseGenericRecord(); + genericRecord2.Add(0, 2); + + Assert.IsFalse(genericRecord.Equals(genericRecord2)); + } + + [Test] + public void TestEqualsObject() + { + GenericRecord genericRecord = GetBaseGenericRecord(); + object genericRecord2 = genericRecord; + + Assert.IsTrue(genericRecord.Equals(genericRecord2)); + } + + [Test] + public void TestEqualsObjectNotEqual() + { + GenericRecord genericRecord = GetBaseGenericRecord(); + GenericRecord genericRecord2 = GetBaseGenericRecord(); + genericRecord2.Add(0, 2); + + Assert.IsFalse(genericRecord.Equals((object)genericRecord2)); + } + + [Test] + public void TestEqualsObjectNullObject() + { + GenericRecord genericRecord = GetBaseGenericRecord(); + + Assert.IsFalse(genericRecord.Equals((object)null)); + } + + [Test] + public void TestGetHashCode() + { + int hashCode = GetBaseGenericRecord().GetHashCode(); + Assert.IsTrue(hashCode > 0); + } + + [Test] + public void TestGetValue() + { + GenericRecord genericRecord = GetBaseGenericRecord(); + + object value = genericRecord.GetValue(0); + + Assert.IsNotNull(value); + Assert.IsTrue(value is int); + Assert.AreEqual(1, (int)value); + } + + [Test] + public void TestKeyValueLookup() + { + GenericRecord genericRecord = GetBaseGenericRecord(); + + // Key Exists + object existingKey = genericRecord["f2"]; + Assert.IsNotNull(existingKey); + Assert.IsTrue(existingKey is int); + } + + [Test] + public void TestKeyValueLookupThrows() + { + GenericRecord genericRecord = GetBaseGenericRecord(); + + // Key does not exist + Assert.Throws(() => { _ = genericRecord["badField"]; }); + } + + [Test] + public void TestToString() + { + GenericRecord genericRecord = GetBaseGenericRecord(); + string str = genericRecord.ToString(); + string expectedValue = "Schema: {\"type\":\"record\",\"name\":\"r\",\"fields\":" + + "[{\"name\":\"f2\",\"type\":\"int\"},{\"name\":\"f1\",\"type\":" + + "\"boolean\"}]}, contents: { f2: 1, f1: True, }"; + + Assert.AreEqual(expectedValue, str); + } + + + [Test] + public void TestFieldNames() + { + string schemaWithNames = "{\"type\":\"record\",\"name\":\"r\",\"fields\":" + + "[{\"name\":\"æ­ŗäģĨ上\",\"type\":\"int\"}]}"; + + RecordSchema testSchema = Schema.Parse(schemaWithNames) as RecordSchema; + GenericRecord genericRecord = new GenericRecord(testSchema); + genericRecord.Add("æ­ŗäģĨ上", 1); + + string str = genericRecord.ToString(); + string expectedValue = "Schema: {\"type\":\"record\",\"name\":\"r\",\"fields\":" + + "[{\"name\":\"æ­ŗäģĨ上\",\"type\":\"int\"}]}, contents: { æ­ŗäģĨ上: 1, }"; + + Assert.AreEqual(expectedValue, str); + } + + [Test] + public void TestTryGetValue() + { + GenericRecord genericRecord = GetBaseGenericRecord(); + + // Value exists + bool returnResult = genericRecord.TryGetValue("f2", out object result); + + Assert.IsTrue(returnResult); + Assert.IsNotNull(result); + Assert.IsTrue(result is int); + Assert.AreEqual(1, (int)result); + } + + [Test] + public void TestTryGetValueByPosition() + { + GenericRecord genericRecord = GetBaseGenericRecord(); + + bool returnResult = genericRecord.TryGetValue(0, out object value); + + Assert.IsTrue(returnResult); + Assert.IsNotNull(value); + Assert.IsTrue(value is int); + Assert.AreEqual(1, (int)value); + } + + [Test] + public void TestTryGetValueByPositionNotFound() + { + GenericRecord genericRecord = GetBaseGenericRecord(); + + bool returnResult = genericRecord.TryGetValue(3, out object value); + + Assert.IsFalse(returnResult); + Assert.IsNull(value); + } + + [Test] + public void TestTryGetValueNotFound() + { + GenericRecord genericRecord = GetBaseGenericRecord(); + + // Value exists + bool returnResult = genericRecord.TryGetValue("badField", out object result); + + Assert.IsFalse(returnResult); + Assert.IsNull(result); + } + + private GenericRecord GetBaseGenericRecord() + { + RecordSchema testSchema = Schema.Parse(baseSchema) as RecordSchema; + GenericRecord genericRecord = new GenericRecord(testSchema); + genericRecord.Add("f2", 1); + genericRecord.Add("f1", true); + + return genericRecord; + } + } +} diff --git a/lang/csharp/src/apache/test/Generic/GenericTests.cs b/lang/csharp/src/apache/test/Generic/GenericTests.cs index 05aa5bc4944..b87ce69f890 100644 --- a/lang/csharp/src/apache/test/Generic/GenericTests.cs +++ b/lang/csharp/src/apache/test/Generic/GenericTests.cs @@ -17,16 +17,97 @@ */ using System; using System.IO; -using System.Linq; using Avro.IO; using System.Collections.Generic; +using System.Text; using Avro.Generic; using NUnit.Framework; +using Decoder = Avro.IO.Decoder; +using Encoder = Avro.IO.Encoder; namespace Avro.Test.Generic { class GenericTests { + private static string intToUtf8(int value) + { + var decimalLogicalType = new Avro.Util.Decimal(); + var logicalSchema = (LogicalSchema) + Schema.Parse(@"{ ""type"": ""bytes"", ""logicalType"": ""decimal"", ""precision"": 4 }"); + + byte[] byteArray = (byte[])decimalLogicalType.ConvertToBaseValue(new AvroDecimal(value), logicalSchema); + + return Encoding.GetEncoding("iso-8859-1").GetString(byteArray); + } + + [Test] + public void ConvertsDecimalZeroToLogicalType() => ConvertsDefaultToLogicalType( + @"{""type"": ""bytes"", ""logicalType"": ""decimal"", ""precision"": 4}", + @$"""{intToUtf8(0)}""", new AvroDecimal(0)); + + [Test] + public void ConvertsDecimalIntegerToLogicalType() => ConvertsDefaultToLogicalType( + @"{""type"": ""bytes"", ""logicalType"": ""decimal"", ""precision"": 4}", + @$"""{intToUtf8(1234)}""", new AvroDecimal(1234)); + + [Test] + public void ConvertsDecimalScaledToLogicalType() => ConvertsDefaultToLogicalType( + @"{""type"": ""bytes"", ""logicalType"": ""decimal"", ""precision"": 4, ""scale"": 3}", + @$"""{intToUtf8(1234)}""", new AvroDecimal(1.234)); + + private static IEnumerable ConvertsDefaultToLogicalTypeSource = new List() + { + new TestCaseData(@"{""type"": ""string"", ""logicalType"": ""uuid""}", @"""00000000-0000-0000-0000-000000000000""", new Guid()), + new TestCaseData(@"{""type"": ""string"", ""logicalType"": ""uuid""}", @"""00000000000000000000000000000000""", new Guid()), + new TestCaseData(@"{""type"": ""string"", ""logicalType"": ""uuid""}", @"""12345678-1234-5678-1234-123456789012""", new Guid("12345678-1234-5678-1234-123456789012")), + new TestCaseData(@"{""type"": ""string"", ""logicalType"": ""uuid""}", @"""12345678123456781234123456789012""", new Guid("12345678-1234-5678-1234-123456789012")), + new TestCaseData(@"{""type"": ""int"", ""logicalType"": ""date""}", "0", DateTime.UnixEpoch), + new TestCaseData(@"{""type"": ""int"", ""logicalType"": ""date""}", "123456", DateTime.UnixEpoch.AddDays(123456)), + new TestCaseData(@"{""type"": ""long"", ""logicalType"": ""time-micros""}", "0", new TimeSpan()), + new TestCaseData(@"{""type"": ""long"", ""logicalType"": ""time-micros""}", "123456", new TimeSpan(123456*TimeSpan.TicksPerMillisecond/1000)), + new TestCaseData(@"{""type"": ""int"", ""logicalType"": ""time-millis""}", "0", new TimeSpan()), + new TestCaseData(@"{""type"": ""int"", ""logicalType"": ""time-millis""}", "123456", new TimeSpan(0, 0, 0, 0, 123456)), + new TestCaseData(@"{""type"": ""long"", ""logicalType"": ""timestamp-micros""}", "0", DateTime.UnixEpoch), + new TestCaseData(@"{""type"": ""long"", ""logicalType"": ""timestamp-micros""}", "123456", DateTime.UnixEpoch.AddTicks(123456*TimeSpan.TicksPerMillisecond/1000)), + new TestCaseData(@"{""type"": ""long"", ""logicalType"": ""timestamp-millis""}", "0", DateTime.UnixEpoch), + new TestCaseData(@"{""type"": ""long"", ""logicalType"": ""timestamp-millis""}", "123456", DateTime.UnixEpoch.AddMilliseconds(123456)) + }; + + [TestCaseSource(nameof(ConvertsDefaultToLogicalTypeSource))] + public void ConvertsDefaultToLogicalType(string typeDefinition, string defaultDefinition, object expected) + { + var writerSchemaString = @"{ + ""type"": ""record"", + ""name"": ""Foo"", + ""fields"": [ + ] +}"; + + var readerSchemaString = $@"{{ + ""type"": ""record"", + ""name"": ""Foo"", + ""fields"": [ + {{ + ""name"": ""x"", + ""type"": {typeDefinition}, + ""default"": {defaultDefinition} + }} + ] +}}"; + var writerSchema = Schema.Parse(writerSchemaString); + + Stream stream; + + serialize(writerSchemaString, + MkRecord(new object[] { }, (RecordSchema)writerSchema), + out stream, + out _); + + var output = deserialize(stream, writerSchema, Schema.Parse(readerSchemaString)).GetValue(0); + + Assert.AreEqual(expected, output); + } + private static void test(string s, T value) { Stream ms; @@ -47,6 +128,18 @@ private static void test(string s, T value) [TestCase("[\"int\", \"long\"]", 100L)] [TestCase("[\"float\", \"double\"]", 100.75)] [TestCase("[\"float\", \"double\"]", 23.67f)] + [TestCase("[\"float\", \"int\"]", 0)] + [TestCase("[\"float\", \"int\"]", 0.0f)] + [TestCase("[\"float\", \"int\"]", 100)] + [TestCase("[\"float\", \"int\"]", 100.0f)] + [TestCase("[\"float\", \"int\"]", -100)] + [TestCase("[\"float\", \"int\"]", -100.0f)] + [TestCase("[\"double\", \"long\"]", 0L)] + [TestCase("[\"double\", \"long\"]", 0.0)] + [TestCase("[\"double\", \"long\"]", 100L)] + [TestCase("[\"double\", \"long\"]", 100.0)] + [TestCase("[\"double\", \"long\"]", -100L)] + [TestCase("[\"double\", \"long\"]", -100.0)] [TestCase("[{\"type\": \"array\", \"items\": \"float\"}, \"double\"]", new float[] { 23.67f, 22.78f })] [TestCase("[{\"type\": \"array\", \"items\": \"float\"}, \"double\"]", 100.89)] [TestCase("[{\"type\": \"array\", \"items\": \"string\"}, \"string\"]", "a")] @@ -98,7 +191,7 @@ public void TestPrimitive(string schema, object value) new object[] { "f1", new byte[] { 1, 2 } })] public void TestRecord(string schema, object[] kv) { - test(schema, mkRecord(kv, Schema.Parse(schema) as RecordSchema)); + test(schema, MkRecord(kv, Schema.Parse(schema) as RecordSchema)); } [TestCase("{\"type\": \"map\", \"values\": \"string\"}", @@ -166,7 +259,7 @@ public void TestLogical_Decimal_Fixed() new object[] { "f1", "v1" })] public void TestUnion_record(string unionSchema, string recordSchema, object[] value) { - test(unionSchema, mkRecord(value, Schema.Parse(recordSchema) as RecordSchema)); + test(unionSchema, MkRecord(value, Schema.Parse(recordSchema) as RecordSchema)); } [TestCase("[{\"type\": \"enum\", \"symbols\": [\"s1\", \"s2\"], \"name\": \"e\"}, \"string\"]", @@ -344,8 +437,8 @@ public void TestResolution_enum() new object[] { "f1", true, "f2", "d" }, Description = "Default field")] public void TestResolution_record(string ws, object[] actual, string rs, object[] expected) { - TestResolution(ws, mkRecord(actual, Schema.Parse(ws) as RecordSchema), rs, - mkRecord(expected, Schema.Parse(rs) as RecordSchema)); + TestResolution(ws, MkRecord(actual, Schema.Parse(ws) as RecordSchema), rs, + MkRecord(expected, Schema.Parse(rs) as RecordSchema)); } [TestCase("{\"type\":\"map\",\"values\":\"int\"}", new object[] { "a", 100, "b", -202 }, @@ -419,11 +512,11 @@ public void TestResolutionMismatch_record(string ws, object[] actual, string rs, { if (expectedExceptionType != null) { - Assert.Throws(expectedExceptionType, () => { testResolutionMismatch(ws, mkRecord(actual, Schema.Parse(ws) as RecordSchema), rs); }); + Assert.Throws(expectedExceptionType, () => { testResolutionMismatch(ws, MkRecord(actual, Schema.Parse(ws) as RecordSchema), rs); }); } else { - testResolutionMismatch(ws, mkRecord(actual, Schema.Parse(ws) as RecordSchema), rs); + testResolutionMismatch(ws, MkRecord(actual, Schema.Parse(ws) as RecordSchema), rs); } } @@ -491,7 +584,7 @@ public void TestRecordEquality_arrayFieldnotEqual() "{\"type\":\"record\",\"name\":\"r\",\"fields\":" + "[{\"name\":\"a\",\"type\":{\"type\":\"array\",\"items\":\"int\"}}]}"); - Func makeRec = arr => mkRecord(new object[] { "a", arr }, schema); + Func makeRec = arr => MkRecord(new object[] { "a", arr }, schema); var rec1 = makeRec(new[] { 69, 23 }); var rec2 = makeRec(new[] { 42, 11 }); @@ -506,7 +599,7 @@ public void TestRecordEquality_arrayFieldequal() "{\"type\":\"record\",\"name\":\"r\",\"fields\":" + "[{\"name\":\"a\",\"type\":{\"type\":\"array\",\"items\":\"int\"}}]}"); - Func makeRec = arr => mkRecord(new object[] { "a", arr }, schema); + Func makeRec = arr => MkRecord(new object[] { "a", arr }, schema); // Intentionally duplicated so reference equality doesn't apply var rec1 = makeRec(new[] { 89, 12, 66 }); @@ -522,7 +615,7 @@ public void TestRecordEquality_mapFieldequal() "{\"type\":\"record\",\"name\":\"r\",\"fields\":" + "[{\"name\":\"a\",\"type\":{\"type\":\"map\",\"values\":\"int\"}}]}"); - Func makeRec = value => mkRecord( + Func makeRec = value => MkRecord( new object[] { "a", new Dictionary { { "key", value } } }, schema); var rec1 = makeRec(52); @@ -538,7 +631,7 @@ public void TestRecordEquality_mapFieldnotEqual() "{\"type\":\"record\",\"name\":\"r\",\"fields\":" + "[{\"name\":\"a\",\"type\":{\"type\":\"map\",\"values\":\"int\"}}]}"); - Func makeRec = value => mkRecord( + Func makeRec = value => MkRecord( new object[] { "a", new Dictionary { { "key", value } } }, schema); var rec1 = makeRec(69); @@ -547,7 +640,7 @@ public void TestRecordEquality_mapFieldnotEqual() Assert.AreNotEqual(rec1, rec2); } - private static GenericRecord mkRecord(object[] kv, RecordSchema s) + public static GenericRecord MkRecord(object[] kv, RecordSchema s) { GenericRecord input = new GenericRecord(s); for (int i = 0; i < kv.Length; i += 2) diff --git a/lang/csharp/src/apache/test/IO/BinaryCodecTests.cs b/lang/csharp/src/apache/test/IO/BinaryCodecTests.cs index a6a1731e2d8..a638b73fea2 100644 --- a/lang/csharp/src/apache/test/IO/BinaryCodecTests.cs +++ b/lang/csharp/src/apache/test/IO/BinaryCodecTests.cs @@ -20,6 +20,7 @@ using NUnit.Framework; using System.IO; using System.Linq; +using System.Text; using Avro.IO; namespace Avro.Test @@ -31,7 +32,7 @@ namespace Avro.Test delegate void Encode(Encoder e, T t); /// - /// Tests the BinaryEncoder and BinaryDecoder. This is pertty general set of test cases and hence + /// Tests the BinaryEncoder and BinaryDecoder. This is pretty general set of test cases and hence /// can be used for any encoder and its corresponding decoder. /// [TestFixture] @@ -214,23 +215,105 @@ public void TestString(string n, int overhead) TestSkip(n, (Decoder d) => d.SkipString(), (Encoder e, string t) => e.WriteString(t), overhead + n.Length); } -#if NETCOREAPP3_1 +#if NETCOREAPP3_1_OR_GREATER [Test] - public void TestLargeString() + public void TestStringReadIntoArrayPool() { + const int maxFastReadLength = 4096; + // Create a 16KB buffer in the Array Pool var largeBufferToSeedPool = ArrayPool.Shared.Rent(2 << 14); ArrayPool.Shared.Return(largeBufferToSeedPool); - // Create a slightly less than 16KB buffer, which will use the 16KB buffer in the pool - var n = string.Concat(Enumerable.Repeat("1234567890", 1600)); - var overhead = 3; + var n = string.Concat(Enumerable.Repeat("A", maxFastReadLength)); + var overhead = 2; TestRead(n, (Decoder d) => d.ReadString(), (Encoder e, string t) => e.WriteString(t), overhead + n.Length); - TestSkip(n, (Decoder d) => d.SkipString(), (Encoder e, string t) => e.WriteString(t), overhead + n.Length); } + + [Test] + public void TestStringReadByBinaryReader() + { + const int overhead = 2; + const int maxFastReadLength = 4096; + const int expectedStringLength = maxFastReadLength + 1; + var n = string.Concat(Enumerable.Repeat("A", expectedStringLength)); + + TestRead(n, (Decoder d) => d.ReadString(), (Encoder e, string t) => e.WriteString(t), expectedStringLength + overhead); + } +#endif + + [Test] + public void TestInvalidInputWithNegativeStringLength() + { + using (MemoryStream iostr = new MemoryStream()) + { + Encoder e = new BinaryEncoder(iostr); + + e.WriteLong(-1); + + iostr.Flush(); + iostr.Position = 0; + Decoder d = new BinaryDecoder(iostr); + + var exception = Assert.Throws(() => d.ReadString()); + + Assert.NotNull(exception); + Assert.AreEqual("Can not deserialize a string with negative length!", exception.Message); + iostr.Close(); + } + } + + [Test] + public void TestInvalidInputWithMaxIntAsStringLength() + { + using (MemoryStream iostr = new MemoryStream()) + { + Encoder e = new BinaryEncoder(iostr); + + e.WriteLong(int.MaxValue); + e.WriteBytes(Encoding.UTF8.GetBytes("SomeSmallString")); + + iostr.Flush(); + iostr.Position = 0; + Decoder d = new BinaryDecoder(iostr); + + var exception = Assert.Throws(() => d.ReadString()); + + Assert.NotNull(exception); + Assert.AreEqual("String length is not supported!", exception.Message); + iostr.Close(); + } + } + + [Test] + public void TestInvalidInputWithMaxArrayLengthAsStringLength() + { + using (MemoryStream iostr = new MemoryStream()) + { + Encoder e = new BinaryEncoder(iostr); + +#if NETCOREAPP3_1_OR_GREATER + const int maximumArrayLength = 0x7FFFFFC7; +#else + const int maximumArrayLength = 0x7FFFFFFF / 2; #endif + e.WriteLong(maximumArrayLength); + e.WriteBytes(Encoding.UTF8.GetBytes("SomeSmallString")); + + iostr.Flush(); + iostr.Position = 0; + Decoder d = new BinaryDecoder(iostr); + + var exception = Assert.Throws(() => d.ReadString()); + + Assert.NotNull(exception); + Assert.AreEqual("Could not read as many bytes from stream as expected!", exception.Message); + iostr.Close(); + } + } + [TestCase(0, 1)] [TestCase(1, 1)] [TestCase(64, 2)] diff --git a/lang/csharp/src/apache/test/IO/JsonCodecTests.cs b/lang/csharp/src/apache/test/IO/JsonCodecTests.cs new file mode 100644 index 00000000000..fe2183a2f1f --- /dev/null +++ b/lang/csharp/src/apache/test/IO/JsonCodecTests.cs @@ -0,0 +1,540 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using NUnit.Framework; +using System.IO; +using System.Linq; +using System.Text; +using Avro.Generic; +using Avro.IO; +using Avro.Specific; +using Newtonsoft.Json; +using Newtonsoft.Json.Linq; + +namespace Avro.Test +{ + using Decoder = Avro.IO.Decoder; + using Encoder = Avro.IO.Encoder; + + /// + /// Tests the JsonEncoder and JsonDecoder. + /// + [TestFixture] + public class JsonCodecTests + { + [TestCase("{ \"type\": \"record\", \"name\": \"r\", \"fields\": [ " + + " { \"name\" : \"f1\", \"type\": \"int\" }, " + + " { \"name\" : \"f2\", \"type\": \"float\" } " + + "] }", + "{ \"f2\": 10.4, \"f1\": 10 } ")] + [TestCase("{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": [ \"s1\", \"s2\"] }", " \"s1\" ")] + [TestCase("{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": [ \"s1\", \"s2\"] }", " \"s2\" ")] + [TestCase("{ \"type\": \"fixed\", \"name\": \"f\", \"size\": 5 }", "\"hello\"")] + [TestCase("{ \"type\": \"array\", \"items\": \"int\" }", "[ 10, 20, 30 ]")] + [TestCase("{ \"type\": \"map\", \"values\": \"int\" }", "{ \"k1\": 10, \"k2\": 20, \"k3\": 30 }")] + [TestCase("[ \"int\", \"long\" ]", "{ \"int\": 10 }")] + [TestCase("\"string\"", "\"hello\"")] + [TestCase("\"bytes\"", "\"hello\"")] + [TestCase("\"int\"", "10")] + [TestCase("\"long\"", "10")] + [TestCase("\"float\"", "10.0")] + [TestCase("\"double\"", "10.0")] + [TestCase("\"boolean\"", "true")] + [TestCase("\"boolean\"", "false")] + [TestCase("\"null\"", "null")] + public void TestJsonAllTypesValidValues(String schemaStr, String value) + { + Schema schema = Schema.Parse(schemaStr); + byte[] avroBytes = fromJsonToAvro(value, schema); + + Assert.IsTrue(JToken.DeepEquals(JToken.Parse(value), + JToken.Parse(fromAvroToJson(avroBytes, schema, true)))); + } + + [TestCase("{ \"type\": \"record\", \"name\": \"r\", \"fields\": [ " + + " { \"name\" : \"f1\", \"type\": \"int\" }, " + + " { \"name\" : \"f2\", \"type\": \"float\" } " + + "] }", + "{ \"f4\": 10.4, \"f3\": 10 } ")] + [TestCase("{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": [ \"s1\", \"s2\"] }", " \"s3\" ")] + [TestCase("{ \"type\": \"fixed\", \"name\": \"f\", \"size\": 10 }", "\"hello\"")] + [TestCase("{ \"type\": \"array\", \"items\": \"int\" }", "[ \"10\", \"20\", \"30\" ]")] + [TestCase("{ \"type\": \"map\", \"values\": \"int\" }", "{ \"k1\": \"10\", \"k2\": \"20\"}")] + [TestCase("[ \"int\", \"long\" ]", "10")] + [TestCase("\"string\"", "10")] + [TestCase("\"bytes\"", "10")] + [TestCase("\"int\"", "\"hi\"")] + [TestCase("\"long\"", "\"hi\"")] + [TestCase("\"float\"", "\"hi\"")] + [TestCase("\"double\"", "\"hi\"")] + [TestCase("\"boolean\"", "\"hi\"")] + [TestCase("\"boolean\"", "\"hi\"")] + [TestCase("\"null\"", "\"hi\"")] + public void TestJsonAllTypesInvalidValues(String schemaStr, String value) + { + Schema schema = Schema.Parse(schemaStr); + Assert.Throws(() => fromJsonToAvro(value, schema)); + } + + [TestCase("{ \"type\": \"record\", \"name\": \"r\", \"fields\": [ " + + " { \"name\" : \"f1\", \"type\": \"int\" }, " + + " { \"name\" : \"f2\", \"type\": \"float\" } " + + "] }", + "{ \"f2\": 10.4, \"f1")] + [TestCase("{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": [ \"s1\", \"s2\"] }", "s1")] + [TestCase("\"string\"", "\"hi")] + public void TestJsonMalformed(String schemaStr, String value) + { + Schema schema = Schema.Parse(schemaStr); + Assert.Throws(() => fromJsonToAvro(value, schema)); + } + + [Test] + public void TestJsonEncoderWhenIncludeNamespaceOptionIsFalse() + { + string value = "{\"b\": {\"string\":\"myVal\"}, \"a\": 1}"; + string schemaStr = "{\"type\": \"record\", \"name\": \"ab\", \"fields\": [" + + "{\"name\": \"a\", \"type\": \"int\"}, {\"name\": \"b\", \"type\": [\"null\", \"string\"]}" + + "]}"; + Schema schema = Schema.Parse(schemaStr); + byte[] avroBytes = fromJsonToAvro(value, schema); + + Assert.IsTrue(JToken.DeepEquals(JObject.Parse("{\"b\":\"myVal\",\"a\":1}"), + JObject.Parse(fromAvroToJson(avroBytes, schema, false)))); + } + + [Test] + public void TestJsonEncoderWhenIncludeNamespaceOptionIsTrue() + { + string value = "{\"b\": {\"string\":\"myVal\"}, \"a\": 1}"; + string schemaStr = "{\"type\": \"record\", \"name\": \"ab\", \"fields\": [" + + "{\"name\": \"a\", \"type\": \"int\"}, {\"name\": \"b\", \"type\": [\"null\", \"string\"]}" + + "]}"; + Schema schema = Schema.Parse(schemaStr); + byte[] avroBytes = fromJsonToAvro(value, schema); + + Assert.IsTrue(JToken.DeepEquals(JObject.Parse("{\"b\":{\"string\":\"myVal\"},\"a\":1}"), + JObject.Parse(fromAvroToJson(avroBytes, schema, true)))); + } + + [Test] + public void TestJsonRecordOrdering() + { + string value = "{\"b\": 2, \"a\": 1}"; + Schema schema = Schema.Parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [" + + "{\"name\": \"a\", \"type\": \"int\"}, {\"name\": \"b\", \"type\": \"int\"}" + + "]}"); + GenericDatumReader reader = new GenericDatumReader(schema, schema); + Decoder decoder = new JsonDecoder(schema, value); + object o = reader.Read(null, decoder); + + Assert.AreEqual("{\"a\":1,\"b\":2}", fromDatumToJson(o, schema, false)); + } + + [Test] + public void TestJsonRecordOrdering2() + { + string value = "{\"b\": { \"b3\": 1.4, \"b2\": 3.14, \"b1\": \"h\"}, \"a\": {\"a2\":true, \"a1\": null}}"; + Schema schema = Schema.Parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" + + "{\"name\": \"a\", \"type\": {\"type\":\"record\",\"name\":\"A\",\"fields\":\n" + + "[{\"name\":\"a1\", \"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}},\n" + + "{\"name\": \"b\", \"type\": {\"type\":\"record\",\"name\":\"B\",\"fields\":\n" + + "[{\"name\":\"b1\", \"type\":\"string\"}, {\"name\":\"b2\", \"type\":\"float\"}, {\"name\":\"b3\", \"type\":\"double\"}]}}\n" + + "]}"); + GenericDatumReader reader = new GenericDatumReader(schema, schema); + Decoder decoder = new JsonDecoder(schema, value); + object o = reader.Read(null, decoder); + + Assert.AreEqual("{\"a\":{\"a1\":null,\"a2\":true},\"b\":{\"b1\":\"h\",\"b2\":3.14,\"b3\":1.4}}", + fromDatumToJson(o, schema, false)); + } + + [Test] + public void TestJsonRecordOrderingWithProjection() + { + String value = "{\"b\": { \"b3\": 1.4, \"b2\": 3.14, \"b1\": \"h\"}, \"a\": {\"a2\":true, \"a1\": null}}"; + Schema writerSchema = Schema.Parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" + + "{\"name\": \"a\", \"type\": {\"type\":\"record\",\"name\":\"A\",\"fields\":\n" + + "[{\"name\":\"a1\", \"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}},\n" + + "{\"name\": \"b\", \"type\": {\"type\":\"record\",\"name\":\"B\",\"fields\":\n" + + "[{\"name\":\"b1\", \"type\":\"string\"}, {\"name\":\"b2\", \"type\":\"float\"}, {\"name\":\"b3\", \"type\":\"double\"}]}}\n" + + "]}"); + Schema readerSchema = Schema.Parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" + + "{\"name\": \"a\", \"type\": {\"type\":\"record\",\"name\":\"A\",\"fields\":\n" + + "[{\"name\":\"a1\", \"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}}\n" + + "]}"); + GenericDatumReader reader = new GenericDatumReader(writerSchema, readerSchema); + Decoder decoder = new JsonDecoder(writerSchema, value); + Object o = reader.Read(null, decoder); + + Assert.AreEqual("{\"a\":{\"a1\":null,\"a2\":true}}", + fromDatumToJson(o, readerSchema, false)); + } + + + [Test] + public void TestJsonRecordOrderingWithProjection2() + { + String value = + "{\"b\": { \"b1\": \"h\", \"b2\": [3.14, 3.56], \"b3\": 1.4}, \"a\": {\"a2\":true, \"a1\": null}}"; + Schema writerSchema = Schema.Parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" + + "{\"name\": \"a\", \"type\": {\"type\":\"record\",\"name\":\"A\",\"fields\":\n" + + "[{\"name\":\"a1\", \"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}},\n" + + "{\"name\": \"b\", \"type\": {\"type\":\"record\",\"name\":\"B\",\"fields\":\n" + + "[{\"name\":\"b1\", \"type\":\"string\"}, {\"name\":\"b2\", \"type\":{\"type\":\"array\", \"items\":\"float\"}}, {\"name\":\"b3\", \"type\":\"double\"}]}}\n" + + "]}"); + + Schema readerSchema = Schema.Parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" + + "{\"name\": \"a\", \"type\": {\"type\":\"record\",\"name\":\"A\",\"fields\":\n" + + "[{\"name\":\"a1\", \"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}}\n" + + "]}"); + + GenericDatumReader reader = new GenericDatumReader(writerSchema, readerSchema); + Decoder decoder = new JsonDecoder(writerSchema, value); + object o = reader.Read(null, decoder); + + Assert.AreEqual("{\"a\":{\"a1\":null,\"a2\":true}}", + fromDatumToJson(o, readerSchema, false)); + } + + [TestCase("{\"int\":123}")] + [TestCase("{\"string\":\"12345678-1234-5678-1234-123456789012\"}")] + [TestCase("null")] + public void TestJsonUnionWithLogicalTypes(String value) + { + Schema schema = Schema.Parse( + "[\"null\",\n" + + " { \"type\": \"int\", \"logicalType\": \"date\" },\n" + + " { \"type\": \"string\", \"logicalType\": \"uuid\" }\n" + + "]"); + GenericDatumReader reader = new GenericDatumReader(schema, schema); + Decoder decoder = new JsonDecoder(schema, value); + object o = reader.Read(null, decoder); + + Assert.AreEqual(value, fromDatumToJson(o, schema, true)); + } + + [TestCase("{\"int\":123}")] + [TestCase("{\"com.myrecord\":{\"f1\":123}}")] + [TestCase("null")] + public void TestJsonUnionWithRecord(String value) + { + Schema schema = Schema.Parse( + "[\"null\",\n" + + " { \"type\": \"int\", \"logicalType\": \"date\" },\n" + + " {\"type\":\"record\",\"name\":\"myrecord\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f1\",\"type\": \"int\"}]}" + + "]"); + GenericDatumReader reader = new GenericDatumReader(schema, schema); + Decoder decoder = new JsonDecoder(schema, value); + object o = reader.Read(null, decoder); + + Assert.AreEqual(value, fromDatumToJson(o, schema, true)); + } + + [TestCase("int", 1)] + [TestCase("long", 1L)] + [TestCase("float", 1.0F)] + [TestCase("double", 1.0)] + public void TestJsonDecoderNumeric(string type, object value) + { + string def = "{\"type\":\"record\",\"name\":\"X\",\"fields\":" + "[{\"type\":\"" + type + + "\",\"name\":\"n\"}]}"; + Schema schema = Schema.Parse(def); + DatumReader reader = new GenericDatumReader(schema, schema); + + string[] records = { "{\"n\":1}", "{\"n\":1.0}" }; + + foreach (GenericRecord g in records.Select(r => reader.Read(null, new JsonDecoder(schema, r)))) + { + Assert.AreEqual(value, g["n"]); + } + } + + [Test] + [TestCase("float", "0", (float)0)] + [TestCase("float", "1", (float)1)] + [TestCase("float", "1.0", (float)1.0)] + [TestCase("double", "0", (double)0)] + [TestCase("double", "1", (double)1)] + [TestCase("double", "1.0", 1.0)] + [TestCase("float", "\"NaN\"", float.NaN)] + [TestCase("float", "\"Infinity\"", float.PositiveInfinity)] + [TestCase("float", "\"INF\"", float.PositiveInfinity)] + [TestCase("float", "\"-Infinity\"", float.NegativeInfinity)] + [TestCase("float", "\"-INF\"", float.NegativeInfinity)] + [TestCase("double", "\"NaN\"", double.NaN)] + [TestCase("double", "\"Infinity\"", double.PositiveInfinity)] + [TestCase("double", "\"INF\"", double.PositiveInfinity)] + [TestCase("double", "\"-Infinity\"", double.NegativeInfinity)] + [TestCase("double", "\"-INF\"", double.NegativeInfinity)] + [TestCase("float", "\"\"", null)] + [TestCase("float", "\"unknown\"", null)] + [TestCase("float", "\"nan\"", null)] + [TestCase("float", "\"infinity\"", null)] + [TestCase("float", "\"inf\"", null)] + [TestCase("float", "\"-infinity\"", null)] + [TestCase("float", "\"-inf\"", null)] + [TestCase("double", "\"\"", null)] + [TestCase("double", "\"unknown\"", null)] + [TestCase("double", "\"nan\"", null)] + [TestCase("double", "\"infinity\"", null)] + [TestCase("double", "\"inf\"", null)] + [TestCase("double", "\"-infinity\"", null)] + [TestCase("double", "\"-inf\"", null)] + [TestCase("double", "\"-inf\"", null)] + public void TestJsonDecodeFloatDouble(string typeStr, string valueStr, object expected) + { + string def = $"{{\"type\":\"record\",\"name\":\"X\",\"fields\":[{{\"type\":\"{typeStr}\",\"name\":\"Value\"}}]}}"; + Schema schema = Schema.Parse(def); + DatumReader reader = new GenericDatumReader(schema, schema); + + string record = $"{{\"Value\":{valueStr}}}"; + Decoder decoder = new JsonDecoder(schema, record); + try + { + GenericRecord r = reader.Read(null, decoder); + Assert.AreEqual(expected, r["Value"]); + } + catch (AvroTypeException) + { + if (expected != null) + { + throw; + } + } + } + + [TestCase("{ \"s\": \"1900-01-01T00:00:00Z\" }", "1900-01-01T00:00:00Z")] + [TestCase("{ \"s\": \"1900-01-01T00:00:00.0000000Z\" }", "1900-01-01T00:00:00.0000000Z")] + [TestCase("{ \"s\": \"1900-01-01T00:00:00\" }", "1900-01-01T00:00:00")] + public void TestJsonDecoderStringDates(string json, string expected) + { + string def = "{\"type\":\"record\",\"name\":\"X\",\"fields\": [{\"type\": \"string\",\"name\":\"s\"}]}"; + Schema schema = Schema.Parse(def); + DatumReader reader = new GenericDatumReader(schema, schema); + + var response = reader.Read(null, new JsonDecoder(schema, json)); + + Assert.AreEqual(expected, response["s"]); + } + + // Ensure that even if the order of fields in JSON is different from the order in schema, it works. + [Test] + public void TestJsonDecoderReorderFields() + { + String w = "{\"type\":\"record\",\"name\":\"R\",\"fields\":" + "[{\"type\":\"long\",\"name\":\"l\"}," + + "{\"type\":{\"type\":\"array\",\"items\":\"int\"},\"name\":\"a\"}" + + "]}"; + Schema ws = Schema.Parse(w); + String data = "{\"a\":[1,2],\"l\":100}"; + JsonDecoder decoder = new JsonDecoder(ws, data); + Assert.AreEqual(100, decoder.ReadLong()); + decoder.SkipArray(); + data = "{\"l\": 200, \"a\":[1,2]}"; + decoder = new JsonDecoder(ws, data); + Assert.AreEqual(200, decoder.ReadLong()); + decoder.SkipArray(); + } + + [Test] + public void TestJsonDecoderSpecificDatumWriterWithArrayAndMap() + { + Root data = new Root(); + Item item = new Item { id = 123456 }; + data.myarray = new List { item }; + data.mymap = new Dictionary { { "1", 1 }, { "2", 2 }, { "3", 3 }, { "4", 4 } }; + + DatumWriter writer = new SpecificDatumWriter(data.Schema); + + ByteBufferOutputStream bbos = new ByteBufferOutputStream(); + + Encoder encoder = new JsonEncoder(data.Schema, bbos); + writer.Write(data, encoder); + encoder.Flush(); + + List listStreams = bbos.GetBufferList(); + + using (StreamReader reader = new StreamReader(listStreams[0])) + { + String output = reader.ReadToEnd(); + Assert.AreEqual("{\"myarray\":[{\"id\":123456}],\"mymap\":{\"map\":{\"1\":1,\"2\":2,\"3\":3,\"4\":4}}}", output); + } + } + + [Test] + public void TestJsonDecoderSpecificDefaultWriterWithArrayAndMap() + { + Root data = new Root(); + Item item = new Item { id = 123456 }; + data.myarray = new List { item }; + data.mymap = new Dictionary { { "1", 1 }, { "2", 2 }, { "3", 3 }, { "4", 4 } }; + + SpecificDefaultWriter writer = new SpecificDefaultWriter(data.Schema); + + ByteBufferOutputStream bbos = new ByteBufferOutputStream(); + + Encoder encoder = new JsonEncoder(data.Schema, bbos); + writer.Write(data, encoder); + encoder.Flush(); + + List listStreams = bbos.GetBufferList(); + + using (StreamReader reader = new StreamReader(listStreams[0])) + { + String output = reader.ReadToEnd(); + Assert.AreEqual("{\"myarray\":[{\"id\":123456}],\"mymap\":{\"map\":{\"1\":1,\"2\":2,\"3\":3,\"4\":4}}}", output); + } + } + + private byte[] fromJsonToAvro(string json, Schema schema) + { + DatumReader reader = new GenericDatumReader(schema, schema); + GenericDatumWriter writer = new GenericDatumWriter(schema); + MemoryStream output = new MemoryStream(); + + Decoder decoder = new JsonDecoder(schema, json); + Encoder encoder = new BinaryEncoder(output); + + object datum = reader.Read(null, decoder); + + writer.Write(datum, encoder); + encoder.Flush(); + output.Flush(); + + return output.ToArray(); + } + + private string fromAvroToJson(byte[] avroBytes, Schema schema, bool includeNamespace) + { + GenericDatumReader reader = new GenericDatumReader(schema, schema); + + Decoder decoder = new BinaryDecoder(new MemoryStream(avroBytes)); + object datum = reader.Read(null, decoder); + return fromDatumToJson(datum, schema, includeNamespace); + } + + private string fromDatumToJson(object datum, Schema schema, bool includeNamespace) + { + DatumWriter writer = new GenericDatumWriter(schema); + MemoryStream output = new MemoryStream(); + + JsonEncoder encoder = new JsonEncoder(schema, output); + encoder.IncludeNamespace = includeNamespace; + writer.Write(datum, encoder); + encoder.Flush(); + output.Flush(); + + return Encoding.UTF8.GetString(output.ToArray()); + } + } + + public partial class Root : global::Avro.Specific.ISpecificRecord + { + public static global::Avro.Schema _SCHEMA = global::Avro.Schema.Parse( + "{\"type\":\"record\",\"name\":\"Root\",\"namespace\":\"Avro.Test\",\"fields\":[{\"name\":\"myarray" + + "\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"Item\",\"namespace\":\"Avr" + + "o.Test\",\"fields\":[{\"name\":\"id\",\"type\":\"long\"}]}}},{\"name\":\"mymap\",\"default\":null," + + "\"type\":[\"null\",{\"type\":\"map\",\"values\":\"int\"}]}]}"); + private IList _myarray; + private IDictionary _mymap; + + public virtual global::Avro.Schema Schema + { + get { return Root._SCHEMA; } + } + + public IList myarray + { + get { return this._myarray; } + set { this._myarray = value; } + } + + public IDictionary mymap + { + get { return this._mymap; } + set { this._mymap = value; } + } + + public virtual object Get(int fieldPos) + { + switch (fieldPos) + { + case 0: return this.myarray; + case 1: return this.mymap; + default: throw new global::Avro.AvroRuntimeException("Bad index " + fieldPos + " in Get()"); + } + } + + public virtual void Put(int fieldPos, object fieldValue) + { + switch (fieldPos) + { + case 0: + this.myarray = (IList)fieldValue; + break; + case 1: + this.mymap = (IDictionary)fieldValue; + break; + default: throw new global::Avro.AvroRuntimeException("Bad index " + fieldPos + " in Put()"); + } + } + } + + public partial class Item : global::Avro.Specific.ISpecificRecord + { + public static global::Avro.Schema _SCHEMA = global::Avro.Schema.Parse( + "{\"type\":\"record\",\"name\":\"Item\",\"namespace\":\"Avro.Test\",\"fields\":[{\"name\":\"id\",\"ty" + + "pe\":\"long\"}]}"); + + private long _id; + + public virtual global::Avro.Schema Schema + { + get { return Item._SCHEMA; } + } + + public long id + { + get { return this._id; } + set { this._id = value; } + } + + public virtual object Get(int fieldPos) + { + switch (fieldPos) + { + case 0: return this.id; + default: throw new global::Avro.AvroRuntimeException("Bad index " + fieldPos + " in Get()"); + } + } + + public virtual void Put(int fieldPos, object fieldValue) + { + switch (fieldPos) + { + case 0: + this.id = (System.Int64)fieldValue; + break; + default: throw new global::Avro.AvroRuntimeException("Bad index " + fieldPos + " in Put()"); + } + } + } +} diff --git a/lang/csharp/src/apache/test/Interop/InteropDataConstants.cs b/lang/csharp/src/apache/test/Interop/InteropDataConstants.cs index 94bfb408d18..170e28eb5fa 100644 --- a/lang/csharp/src/apache/test/Interop/InteropDataConstants.cs +++ b/lang/csharp/src/apache/test/Interop/InteropDataConstants.cs @@ -25,7 +25,11 @@ public class InteropDataConstants public static readonly HashSet SupportedCodecNames = new HashSet { DataFileConstants.NullCodec, - DataFileConstants.DeflateCodec + DataFileConstants.DeflateCodec, + DataFileConstants.SnappyCodec, + DataFileConstants.BZip2Codec, + DataFileConstants.XZCodec, + DataFileConstants.ZstandardCodec }; } } \ No newline at end of file diff --git a/lang/csharp/src/apache/test/Interop/InteropDataGenerator.cs b/lang/csharp/src/apache/test/Interop/InteropDataGenerator.cs index 10c06f79480..7aa10c0e65a 100644 --- a/lang/csharp/src/apache/test/Interop/InteropDataGenerator.cs +++ b/lang/csharp/src/apache/test/Interop/InteropDataGenerator.cs @@ -86,6 +86,7 @@ static void GenerateInteropData(string schemaPath, string outputDir) var codec = Codec.CreateCodecFromString(codecName); using (var dataFileWriter = DataFileWriter.OpenWriter(datumWriter, outputPath, codec)) { + dataFileWriter.SetMeta("user_metadata", "someByteArray"); dataFileWriter.Append(record); } } diff --git a/lang/csharp/src/apache/test/Interop/InteropDataTests.cs b/lang/csharp/src/apache/test/Interop/InteropDataTests.cs index 7215df4321c..4f66a7e4459 100644 --- a/lang/csharp/src/apache/test/Interop/InteropDataTests.cs +++ b/lang/csharp/src/apache/test/Interop/InteropDataTests.cs @@ -48,6 +48,11 @@ public void TestInterop(string inputDir) using(var reader = DataFileReader.OpenReader(avroFile)) { int i = 0; + string userMetadata = reader.GetMetaString("user_metadata"); + if (userMetadata != null) + { + Assert.AreEqual("someByteArray", userMetadata); + } foreach (var record in reader.NextEntries) { i++; diff --git a/lang/csharp/src/apache/test/NuGetPackageTests.cs b/lang/csharp/src/apache/test/NuGetPackageTests.cs new file mode 100644 index 00000000000..493cede9312 --- /dev/null +++ b/lang/csharp/src/apache/test/NuGetPackageTests.cs @@ -0,0 +1,156 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.IO; +using System.IO.Compression; +using System.Linq; +using System.Xml.Linq; +using NUnit.Framework; + +namespace Avro.Test.Utils +{ + [TestFixture] + public class NuGetPackageTests + { + private static readonly string[] PackageIds = new[] + { + "Apache.Avro", + "Apache.Avro.Tools", + "Apache.Avro.File.Snappy", + "Apache.Avro.File.BZip2", + "Apache.Avro.File.XZ", + "Apache.Avro.File.Zstandard" + }; + + [TestCaseSource(nameof(PackageIds))] + public void PackageContainsSpdxLicenseExpression(string packageId) + { + var nupkgPath = FindPackageInBuildOutput(packageId); + if (nupkgPath == null) + { + Assert.Inconclusive($"Package {packageId} not found. Run 'dotnet pack --configuration Release' first."); + return; + } + + var nuspecXml = ExtractNuspecFromPackage(nupkgPath); + + // Get the namespace from the root element + var ns = nuspecXml.Root?.Name.Namespace ?? XNamespace.None; + var licenseElement = nuspecXml.Root?.Element(ns + "metadata")?.Element(ns + "license"); + + Assert.That(licenseElement, Is.Not.Null, + $"Package {packageId} does not contain a license element"); + + var licenseType = licenseElement?.Attribute("type")?.Value; + Assert.That(licenseType, Is.EqualTo("expression"), + $"Package {packageId} license type should be 'expression', but was '{licenseType}'"); + + var licenseValue = licenseElement?.Value; + Assert.That(licenseValue, Is.EqualTo("Apache-2.0"), + $"Package {packageId} should have SPDX license expression 'Apache-2.0', but was '{licenseValue}'"); + } + + [TestCaseSource(nameof(PackageIds))] + public void PackageContainsLicenseFile(string packageId) + { + var nupkgPath = FindPackageInBuildOutput(packageId); + if (nupkgPath == null) + { + Assert.Inconclusive($"Package {packageId} not found. Run 'dotnet pack --configuration Release' first."); + return; + } + + using (var archive = ZipFile.OpenRead(nupkgPath)) + { + var licenseEntry = archive.Entries.FirstOrDefault(e => + e.FullName.Equals("LICENSE", StringComparison.OrdinalIgnoreCase)); + + Assert.That(licenseEntry, Is.Not.Null, + $"Package {packageId} does not contain LICENSE file"); + + Assert.That(licenseEntry.Length, Is.GreaterThan(0), + $"Package {packageId} LICENSE file is empty"); + } + } + + [TestCaseSource(nameof(PackageIds))] + public void PackageLicenseFileContainsApacheLicense(string packageId) + { + var nupkgPath = FindPackageInBuildOutput(packageId); + if (nupkgPath == null) + { + Assert.Inconclusive($"Package {packageId} not found. Run 'dotnet pack --configuration Release' first."); + return; + } + + using (var archive = ZipFile.OpenRead(nupkgPath)) + { + var licenseEntry = archive.Entries.FirstOrDefault(e => + e.FullName.Equals("LICENSE", StringComparison.OrdinalIgnoreCase)); + + Assert.That(licenseEntry, Is.Not.Null); + + using (var stream = licenseEntry.Open()) + using (var reader = new StreamReader(stream)) + { + var content = reader.ReadToEnd(); + Assert.That(content, Does.Contain("Apache License"), + $"Package {packageId} LICENSE file does not contain Apache License text"); + Assert.That(content, Does.Contain("Version 2.0"), + $"Package {packageId} LICENSE file does not specify Version 2.0"); + } + } + } + + private string FindPackageInBuildOutput(string packageId) + { + // Find the lang/csharp root (4 levels up from test binary directory: bin/Release/net8.0 -> test -> apache -> src -> csharp) + var testDir = TestContext.CurrentContext.TestDirectory; + var csharpRoot = Path.GetFullPath(Path.Combine(testDir, "..", "..", "..", "..", "..")); + + // Search for the package in Release output directories + var pattern = $"{packageId}.*.nupkg"; + var files = Directory.GetFiles(csharpRoot, pattern, SearchOption.AllDirectories) + .Where(f => f.Contains($"{Path.DirectorySeparatorChar}Release{Path.DirectorySeparatorChar}")) + .OrderByDescending(f => System.IO.File.GetLastWriteTime(f)) + .ToArray(); + + return files.Length > 0 ? files[0] : null; + } + + private XDocument ExtractNuspecFromPackage(string nupkgPath) + { + using (var archive = ZipFile.OpenRead(nupkgPath)) + { + var nuspecEntry = archive.Entries.FirstOrDefault(e => + e.FullName.EndsWith(".nuspec", StringComparison.OrdinalIgnoreCase)); + + if (nuspecEntry == null) + { + Assert.Fail($"No .nuspec file found in package: {nupkgPath}"); + } + + using (var stream = nuspecEntry.Open()) + { + return XDocument.Load(stream); + } + } + } + } +} diff --git a/lang/csharp/src/apache/test/Reflect/TestLogicalSchema.cs b/lang/csharp/src/apache/test/Reflect/TestLogicalSchema.cs new file mode 100644 index 00000000000..c18da4068ea --- /dev/null +++ b/lang/csharp/src/apache/test/Reflect/TestLogicalSchema.cs @@ -0,0 +1,198 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.IO; +using Avro.IO; +using Avro.Reflect; +using NUnit.Framework; + +namespace Avro.test.Reflect +{ + public class TestLogicalSchema + { + [TestCase] + public void WriteAndReadObjectsWithLogicalSchemaFields_WithNullValues() + { + //Arrange + var obj = new TestObject + { + AvroDecimalNullableProperty = null, + AvroDecimalProperty = 13.42m, + GuidNullableProperty = null, + GuidProperty = Guid.NewGuid(), + DateNullableProperty = null, + DateProperty = new DateTime(2022, 05, 26, 14, 57, 24, 123), + DateTimeMicrosecondNullableProperty = null, + DateTimeMicrosecondProperty = DateTime.UtcNow, + DateTimeMillisecondNullableProperty = null, + DateTimeMillisecondProperty = DateTime.UtcNow, + TimeSpanMicrosecondNullableProperty = null, + TimeSpanMicrosecondProperty = new TimeSpan(23, 59, 59), + TimeSpanMillisecondNullableProperty = null, + TimeSpanMillisecondProperty = new TimeSpan(23, 59, 59), + }; + + var schema = Schema.Parse(SchemaJson); + var writer = new ReflectWriter(schema); + var reader = new ReflectReader(schema, schema); + var writeStream = new MemoryStream(); + var writeBinaryEncoder = new BinaryEncoder(writeStream); + + //Act + writer.Write(obj, writeBinaryEncoder); + var data = writeStream.ToArray(); + + var readStream = new MemoryStream(data); + var result = reader.Read(null, new BinaryDecoder(readStream)); + + //Assert + Assert.NotNull(result); + + Assert.IsNull(result.AvroDecimalNullableProperty); + Assert.AreEqual(obj.AvroDecimalProperty, result.AvroDecimalProperty); + + Assert.IsNull(result.GuidNullableProperty); + Assert.AreEqual(obj.GuidProperty, result.GuidProperty); + + Assert.IsNull(obj.DateNullableProperty); + Assert.AreEqual(obj.DateProperty.Date, result.DateProperty); + + Assert.IsNull(result.DateTimeMicrosecondNullableProperty); + Assert.AreEqual((obj.DateTimeMicrosecondProperty.Ticks / 10 ) * 10, result.DateTimeMicrosecondProperty.Ticks); + + Assert.IsNull(result.DateTimeMillisecondNullableProperty); + Assert.AreEqual((obj.DateTimeMillisecondProperty.Ticks / 10000) * 10000, result.DateTimeMillisecondProperty.Ticks); + + Assert.IsNull(result.TimeSpanMicrosecondNullableProperty); + Assert.AreEqual(obj.TimeSpanMicrosecondProperty, result.TimeSpanMicrosecondProperty); + + Assert.IsNull(result.TimeSpanMillisecondNullableProperty); + Assert.AreEqual(obj.TimeSpanMillisecondProperty, result.TimeSpanMillisecondProperty); + } + + [TestCase] + public void WriteAndReadObjectsWithLogicalSchemaFields_WithoutNullValues() + { + //Arrange + var obj = new TestObject + { + AvroDecimalNullableProperty = 136.42m, + AvroDecimalProperty = 13.42m, + GuidNullableProperty = Guid.NewGuid(), + GuidProperty = Guid.NewGuid(), + DateNullableProperty = new DateTime(2022, 05, 26, 14, 57, 24, 123), + DateProperty = new DateTime(2022, 05, 26, 14, 57, 24, 123), + DateTimeMicrosecondNullableProperty = DateTime.UtcNow, + DateTimeMicrosecondProperty = DateTime.UtcNow, + DateTimeMillisecondNullableProperty = DateTime.UtcNow, + DateTimeMillisecondProperty = DateTime.UtcNow, + TimeSpanMicrosecondNullableProperty = new TimeSpan(23, 59, 59), + TimeSpanMicrosecondProperty = new TimeSpan(23, 59, 59), + TimeSpanMillisecondNullableProperty = new TimeSpan(23, 59, 59), + TimeSpanMillisecondProperty = new TimeSpan(23, 59, 59), + }; + + var schema = Schema.Parse(SchemaJson); + var writer = new ReflectWriter(schema); + var reader = new ReflectReader(schema, schema); + var writeStream = new MemoryStream(); + var writeBinaryEncoder = new BinaryEncoder(writeStream); + + //Act + writer.Write(obj, writeBinaryEncoder); + var data = writeStream.ToArray(); + + var readStream = new MemoryStream(data); + var result = reader.Read(null, new BinaryDecoder(readStream)); + + //Assert + Assert.NotNull(result); + + Assert.NotNull(result.AvroDecimalNullableProperty); + Assert.AreEqual(obj.AvroDecimalNullableProperty, result.AvroDecimalNullableProperty); + Assert.AreEqual(obj.AvroDecimalProperty, result.AvroDecimalProperty); + + Assert.NotNull(result.GuidNullableProperty); + Assert.AreEqual(obj.GuidNullableProperty, result.GuidNullableProperty); + Assert.AreEqual(obj.GuidProperty, result.GuidProperty); + + Assert.NotNull(result.DateProperty); + Assert.AreEqual(obj.DateNullableProperty?.Date, result.DateProperty); + Assert.AreEqual(obj.DateProperty.Date, result.DateProperty); + + Assert.NotNull(result.DateTimeMicrosecondNullableProperty); + Assert.AreEqual((obj.DateTimeMicrosecondNullableProperty?.Ticks / 10) * 10, result.DateTimeMicrosecondNullableProperty?.Ticks); + Assert.AreEqual((obj.DateTimeMicrosecondProperty.Ticks / 10) * 10, result.DateTimeMicrosecondProperty.Ticks); + + Assert.NotNull(result.DateTimeMillisecondNullableProperty); + Assert.AreEqual((obj.DateTimeMillisecondNullableProperty?.Ticks / 10000) * 10000, result.DateTimeMillisecondNullableProperty?.Ticks); + Assert.AreEqual((obj.DateTimeMillisecondProperty.Ticks / 10000) * 10000, result.DateTimeMillisecondProperty.Ticks); + + Assert.NotNull(result.TimeSpanMicrosecondNullableProperty); + Assert.AreEqual(obj.TimeSpanMicrosecondNullableProperty, result.TimeSpanMicrosecondNullableProperty); + Assert.AreEqual(obj.TimeSpanMicrosecondProperty, result.TimeSpanMicrosecondProperty); + + Assert.NotNull(result.TimeSpanMillisecondNullableProperty); + Assert.AreEqual(obj.TimeSpanMillisecondNullableProperty, result.TimeSpanMillisecondNullableProperty); + Assert.AreEqual(obj.TimeSpanMillisecondProperty, result.TimeSpanMillisecondProperty); + } + + private const string SchemaJson = @" +{ + ""type"" : ""record"", + ""namespace"" : ""Avro.test.Reflect.Converters"", + ""name"" : ""TestObject"", + ""fields"" : [ + { ""name"" : ""AvroDecimalNullableProperty"" , ""type"" : [""null"", { ""type"": ""bytes"", ""logicalType"": ""decimal"", ""precision"": 6, ""scale"": 2 }] }, + { ""name"" : ""AvroDecimalProperty"" , ""type"" : { ""type"": ""bytes"", ""logicalType"": ""decimal"", ""precision"": 6, ""scale"": 2 } }, + { ""name"" : ""GuidNullableProperty"" , ""type"" : [""null"", { ""type"": ""string"", ""logicalType"": ""uuid""}] }, + { ""name"" : ""GuidProperty"" , ""type"" : { ""type"": ""string"", ""logicalType"": ""uuid""} }, + { ""name"" : ""DateNullableProperty"" , ""type"" : [""null"", { ""type"": ""int"", ""logicalType"": ""date""}] }, + { ""name"" : ""DateProperty"" , ""type"" : { ""type"": ""int"", ""logicalType"": ""date""} }, + { ""name"" : ""DateTimeMicrosecondNullableProperty"" , ""type"" : [""null"", { ""type"": ""long"", ""logicalType"": ""timestamp-micros""}] }, + { ""name"" : ""DateTimeMicrosecondProperty"" , ""type"" : { ""type"": ""long"", ""logicalType"": ""timestamp-micros""} }, + { ""name"" : ""DateTimeMillisecondNullableProperty"" , ""type"" : [""null"", { ""type"": ""long"", ""logicalType"": ""timestamp-millis""}] }, + { ""name"" : ""DateTimeMillisecondProperty"" , ""type"" : { ""type"": ""long"", ""logicalType"": ""timestamp-millis""} }, + { ""name"" : ""TimeSpanMicrosecondNullableProperty"" , ""type"" : [""null"", { ""type"": ""long"", ""logicalType"": ""time-micros""}] }, + { ""name"" : ""TimeSpanMicrosecondProperty"" , ""type"" : { ""type"": ""long"", ""logicalType"": ""time-micros""} }, + { ""name"" : ""TimeSpanMillisecondNullableProperty"" , ""type"" : [""null"", { ""type"": ""int"", ""logicalType"": ""time-millis""}] }, + { ""name"" : ""TimeSpanMillisecondProperty"" , ""type"" : { ""type"": ""int"", ""logicalType"": ""time-millis""} } + ] +} +"; + + public class TestObject + { + public AvroDecimal? AvroDecimalNullableProperty { get; set; } + public AvroDecimal AvroDecimalProperty { get; set; } + public Guid? GuidNullableProperty { get; set; } + public Guid GuidProperty { get; set; } + public DateTime? DateNullableProperty { get; set; } + public DateTime DateProperty { get; set; } + public DateTime? DateTimeMicrosecondNullableProperty { get; set; } + public DateTime DateTimeMicrosecondProperty { get; set; } + public DateTime? DateTimeMillisecondNullableProperty { get; set; } + public DateTime DateTimeMillisecondProperty { get; set; } + public TimeSpan? TimeSpanMicrosecondNullableProperty { get; set; } + public TimeSpan TimeSpanMicrosecondProperty { get; set; } + public TimeSpan? TimeSpanMillisecondNullableProperty { get; set; } + public TimeSpan TimeSpanMillisecondProperty { get; set; } + } + } +} diff --git a/lang/csharp/src/apache/test/Reflect/TestReflect.cs b/lang/csharp/src/apache/test/Reflect/TestReflect.cs index bea5ef23f9a..5cf57253978 100644 --- a/lang/csharp/src/apache/test/Reflect/TestReflect.cs +++ b/lang/csharp/src/apache/test/Reflect/TestReflect.cs @@ -40,17 +40,22 @@ class EnumResolutionRecord public EnumResolutionEnum enumType { get; set; } } + class NullableEnumResolutionRecord + { + public EnumResolutionEnum? enumType { get; set; } + } + [TestCase] public void TestEnumResolution() { Schema writerSchema = Schema.Parse("{\"type\":\"record\",\"name\":\"EnumRecord\",\"namespace\":\"Avro.Test\"," + - "\"fields\":[{\"name\":\"enumType\",\"type\": { \"type\": \"enum\", \"name\": \"EnumType\", \"symbols\": [\"FIRST\", \"SECOND\"]} }]}"); + "\"fields\":[{\"name\":\"enumType\",\"type\": { \"type\": \"enum\", \"name\": \"EnumType\", \"symbols\": [\"FIRST\", \"SECOND\"]} }]}"); var testRecord = new EnumResolutionRecord(); Schema readerSchema = Schema.Parse("{\"type\":\"record\",\"name\":\"EnumRecord\",\"namespace\":\"Avro.Test\"," + - "\"fields\":[{\"name\":\"enumType\",\"type\": { \"type\": \"enum\", \"name\":" + - " \"EnumType\", \"symbols\": [\"THIRD\", \"FIRST\", \"SECOND\"]} }]}");; + "\"fields\":[{\"name\":\"enumType\",\"type\": { \"type\": \"enum\", \"name\":" + + " \"EnumType\", \"symbols\": [\"THIRD\", \"FIRST\", \"SECOND\"]} }]}");; testRecord.enumType = EnumResolutionEnum.SECOND; // serialize @@ -61,6 +66,28 @@ public void TestEnumResolution() Assert.AreEqual( EnumResolutionEnum.SECOND, rec2.enumType ); } + [TestCase] + public void TestNullableEnumResolution() + { + Schema writerSchema = Schema.Parse("{\"type\":\"record\",\"name\":\"EnumRecord\",\"namespace\":\"Avro.Test\"," + + "\"fields\":[{\"name\":\"enumType\",\"type\":[\"null\", { \"type\": \"enum\", \"name\": " + + "\"EnumType\",\"symbols\": [\"THIRD\", \"FIRST\", \"SECOND\"]}] }]}"); + + var testRecord = new NullableEnumResolutionRecord(); + + Schema readerSchema = Schema.Parse("{\"type\":\"record\",\"name\":\"EnumRecord\",\"namespace\":\"Avro.Test\"," + + "\"fields\":[{\"name\":\"enumType\",\"type\":[\"null\", { \"type\": \"enum\", \"name\": " + + "\"EnumType\", \"symbols\": [\"THIRD\", \"FIRST\", \"SECOND\"]}] }]}"); + testRecord.enumType = EnumResolutionEnum.SECOND; + + // serialize + var stream = serialize(writerSchema, testRecord); + + // deserialize + var rec2 = deserialize(stream, writerSchema, readerSchema); + Assert.AreEqual( EnumResolutionEnum.SECOND, rec2.enumType ); + } + private static S deserialize(Stream ms, Schema ws, Schema rs) where S : class { long initialPos = ms.Position; diff --git a/lang/csharp/src/apache/test/Schema/AliasTest.cs b/lang/csharp/src/apache/test/Schema/AliasTest.cs index 422e07fc60b..b9a31bb8170 100644 --- a/lang/csharp/src/apache/test/Schema/AliasTest.cs +++ b/lang/csharp/src/apache/test/Schema/AliasTest.cs @@ -73,9 +73,8 @@ public void TestAliases(string s, bool valid) // also tests properties, defaul Assert.IsTrue(json == json2); } - catch (Exception ex) + catch (Exception) { - Console.WriteLine(ex.Message); Assert.IsFalse(valid); } } diff --git a/lang/csharp/src/apache/test/Schema/AliasesTests.cs b/lang/csharp/src/apache/test/Schema/AliasesTests.cs new file mode 100644 index 00000000000..27ad4b23efd --- /dev/null +++ b/lang/csharp/src/apache/test/Schema/AliasesTests.cs @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using NUnit.Framework; + +namespace Avro.Test +{ + [TestFixture] + public class AliasesTests + { + [TestCase] + public void TestNoNamespace() + { + CollectionAssert.AreEqual(new[] { new SchemaName("alias", null, null, null) }, Aliases.GetSchemaNames(new[] { "alias" }, "name", null)); + } + + [TestCase] + public void TestTypeWithNamespace() + { + CollectionAssert.AreEqual(new[] { new SchemaName("space.alias", null, null, null) }, Aliases.GetSchemaNames(new[] { "alias" }, "name", "space")); + } + + [TestCase] + public void TestTypeWithNamespaceInName() + { + CollectionAssert.AreEqual(new[] { new SchemaName("space.alias", null, null, null) }, Aliases.GetSchemaNames(new[] { "alias" }, "space.name", null)); + } + + [TestCase] + public void TestAliasWithNamespace() + { + CollectionAssert.AreEqual(new[] { new SchemaName("name.alias", null, null, null) }, Aliases.GetSchemaNames(new[] { "name.alias" }, "space.name", null)); + } + } +} diff --git a/lang/csharp/src/apache/test/Schema/ArraySchemaTests.cs b/lang/csharp/src/apache/test/Schema/ArraySchemaTests.cs new file mode 100644 index 00000000000..7b8b7d3139c --- /dev/null +++ b/lang/csharp/src/apache/test/Schema/ArraySchemaTests.cs @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using NUnit.Framework; + +namespace Avro.test +{ + [TestFixture] + public class ArraySchemaTests + { + [Test] + public void EqualsNullCheck() + { + string schemaString = "{\"type\": \"array\", \"items\": \"long\"}"; + ArraySchema nullSchema = null; + + Schema schema = Schema.Parse(schemaString); + + if (schema is ArraySchema arraySchema) + { + Assert.False(arraySchema.Equals(nullSchema)); + } + else + { + Assert.Fail("Must be an array schema"); + } + } + + [Test] + public void EqualsNotArraySchema() + { + string schemaString = "[\"string\", \"null\", \"long\"]"; + string arraySchemaString = "{\"type\": \"array\", \"items\": \"long\"}"; + ArraySchema arraySchema = Schema.Parse(arraySchemaString) as ArraySchema; + Schema schema = Schema.Parse(schemaString); + + Assert.False(arraySchema.Equals(schema)); + } + } +} diff --git a/lang/csharp/src/apache/test/Schema/SchemaNormalizationTests.cs b/lang/csharp/src/apache/test/Schema/SchemaNormalizationTests.cs index 1e670677a48..c6296395153 100644 --- a/lang/csharp/src/apache/test/Schema/SchemaNormalizationTests.cs +++ b/lang/csharp/src/apache/test/Schema/SchemaNormalizationTests.cs @@ -32,6 +32,14 @@ public class SchemaNormalizationTests private static readonly long One = -9223372036854775808; private static readonly byte[] Postfix = { 0, 0, 0, 0, 0, 0, 0, 0 }; + [Test] + public void TestLogicalType() + { + var schema = @"[""int"", {""type"": ""string"", ""logicalType"": ""uuid""}]"; + string pcf = SchemaNormalization.ToParsingForm(Schema.Parse(schema)); + Assert.AreEqual(@"[""int"",""string""]", pcf); + } + [Test, TestCaseSource("ProvideCanonicalTestCases")] public void CanonicalTest(string input, string expectedOutput) { diff --git a/lang/csharp/src/apache/test/Schema/SchemaTests.cs b/lang/csharp/src/apache/test/Schema/SchemaTests.cs index 714567003d1..309ecf4d601 100644 --- a/lang/csharp/src/apache/test/Schema/SchemaTests.cs +++ b/lang/csharp/src/apache/test/Schema/SchemaTests.cs @@ -17,9 +17,9 @@ */ using System; using System.Collections.Generic; -using System.Text; using NUnit.Framework; -using Avro; +using System.Linq; +using Avro.Util; namespace Avro.Test { @@ -69,8 +69,12 @@ public class SchemaTests typeof(SchemaParseException), Description = "No fields")] [TestCase("{\"type\":\"record\",\"name\":\"LongList\", \"fields\": \"hi\"}", typeof(SchemaParseException), Description = "Fields not an array")] - [TestCase("[{\"type\": \"record\",\"name\": \"Test\",\"namespace\":\"ns1\",\"fields\": [{\"name\": \"f\",\"type\": \"long\"}]}," + + [TestCase("[{\"type\": \"record\",\"name\": \"Test\",\"namespace\":\"ns1\",\"fields\": [{\"name\": \"f\",\"type\": \"long\"}]}," + "{\"type\": \"record\",\"name\": \"Test\",\"namespace\":\"ns2\",\"fields\": [{\"name\": \"f\",\"type\": \"long\"}]}]")] + + // Doc + [TestCase("{\"type\": \"record\",\"name\": \"Test\",\"doc\": \"Test Doc\",\"fields\": [{\"name\": \"f\",\"type\": \"long\"}]}")] + // Enum [TestCase("{\"type\": \"enum\", \"name\": \"Test\", \"symbols\": [\"A\", \"B\"]}")] [TestCase("{\"type\": \"enum\", \"name\": \"Status\", \"symbols\": \"Normal Caution Critical\"}", @@ -85,6 +89,7 @@ public class SchemaTests // Array [TestCase("{\"type\": \"array\", \"items\": \"long\"}")] [TestCase("{\"type\": \"array\",\"items\": {\"type\": \"enum\", \"name\": \"Test\", \"symbols\": [\"A\", \"B\"]}}")] + [TestCase("{\"type\": \"array\"}", typeof(AvroTypeException), Description = "No Items")] // Map [TestCase("{\"type\": \"map\", \"values\": \"long\"}")] @@ -136,8 +141,9 @@ public void TestBasic(string s, Type expectedExceptionType = null) public void TestPrimitive(string s, Schema.Type type) { Schema sc = Schema.Parse(s); - Assert.IsTrue(sc is PrimitiveSchema); - Assert.AreEqual(type, sc.Tag); + Schema schema = PrimitiveSchema.Create(type, null); + + Assert.AreEqual(sc, schema); testEquality(s, sc); testToString(sc); @@ -163,6 +169,67 @@ private static void testToString(Schema sc) } } + private static void testToString(Schema sc, string schema) + { + try + { + //remove any excess spaces in the JSON to normalize the match with toString + schema = schema.Replace("{ ", "{") + .Replace("} ", "}") + .Replace("\" ", "\"") + .Replace(", ", ",") + .Replace(": ", ":"); + Assert.AreEqual(sc.ToString(), schema); + } + catch (Exception e) + { + throw new AvroException($"{e} : {sc}", e); + } + } + + [TestCase("{ \"type\": \"null\", \"metafield\": \"abc\" }", Schema.Type.Null)] + [TestCase("{ \"type\": \"boolean\", \"metafield\": \"abc\" }", Schema.Type.Boolean)] + [TestCase("{ \"type\": \"int\", \"metafield\": \"abc\" }", Schema.Type.Int)] + [TestCase("{ \"type\": \"long\", \"metafield\": \"abc\" }", Schema.Type.Long)] + [TestCase("{ \"type\": \"float\", \"metafield\": \"abc\" }", Schema.Type.Float)] + [TestCase("{ \"type\": \"double\", \"metafield\": \"abc\" }", Schema.Type.Double)] + [TestCase("{ \"type\": \"bytes\", \"metafield\": \"abc\" }", Schema.Type.Bytes)] + [TestCase("{ \"type\": \"string\", \"metafield\": \"abc\" }", Schema.Type.String)] + public void TestPrimitiveWithMetadata(string rawSchema, Schema.Type type) + { + Schema definedSchema = Schema.Parse(rawSchema); + Assert.IsTrue(definedSchema is PrimitiveSchema); + Assert.AreEqual(type.ToString().ToLower(), definedSchema.Name); + Assert.AreEqual(type, definedSchema.Tag); + + testEquality(rawSchema, definedSchema); + testToString(definedSchema); + + Assert.True(definedSchema.ToString().Contains("metafield")); + + var rawRecordSchema = "{\"type\":\"record\",\"name\":\"Foo\"," + + "\"fields\":[{\"name\":\"f1\",\"type\":" + rawSchema + + "}]}"; + Schema baseRecordSchema = Schema.Parse(rawRecordSchema); + Assert.AreEqual(Schema.Type.Record, baseRecordSchema.Tag); + RecordSchema recordSchema = baseRecordSchema as RecordSchema; + Assert.IsNotNull(recordSchema); + Assert.AreEqual(1, recordSchema.Count); + + Assert.IsTrue(recordSchema["f1"].Schema is PrimitiveSchema); + Assert.AreEqual(type.ToString().ToLower(), recordSchema["f1"].Schema.Name); + Assert.AreEqual(type, recordSchema["f1"].Schema.Tag); + + testEquality(rawRecordSchema, baseRecordSchema); + testToString(recordSchema["f1"].Schema); + + Assert.True(baseRecordSchema.ToString().Contains("metafield")); + Assert.True(recordSchema["f1"].Schema.ToString().Contains("metafield")); + + Assert.True(definedSchema.Equals(recordSchema["f1"].Schema)); + Assert.AreEqual(definedSchema.GetHashCode(), recordSchema["f1"].Schema.GetHashCode()); + } + [TestCase("{\"type\":\"record\",\"name\":\"LongList\"," + "\"fields\":[{\"name\":\"f1\",\"type\":\"long\"}," + "{\"name\":\"f2\",\"type\": \"int\"}]}", @@ -218,25 +285,190 @@ public void TestRecordDoc(string s, string expectedDoc) var rs = Schema.Parse(s) as RecordSchema; Assert.IsNotNull(rs); Assert.AreEqual(expectedDoc, rs.Documentation); + + var roundTrip = Schema.Parse(rs.ToString()) as RecordSchema; + + Assert.IsNotNull(roundTrip); + Assert.AreEqual(expectedDoc, roundTrip.Documentation); } - [TestCase("{\"type\": \"enum\", \"name\": \"Test\", \"symbols\": [\"A\", \"B\"]}", + [TestCase("{\"type\":\"record\",\"name\":\"Longs\",\"fields\":[{\"name\":\"value\",\"default\":\"100\",\"type\":\"long\",\"aliases\":[\"oldName\"]}]}", + "Longs", null, null, null, + new[] { "value" }, new[] { Schema.Type.Long }, new[] { "100" }, new[] { "oldName" }, new string[] { null })] + [TestCase("{\"type\":\"record\",\"name\":\"Longs\",\"fields\":[{\"name\":\"value\",\"doc\":\"Field With Documentation\",\"default\":\"100\",\"type\":\"long\",\"aliases\":[\"oldName\"]}]}", + "Longs", null, null, null, + new[] { "value" }, new[] { Schema.Type.Long }, new[] { "100" }, new[] { "oldName" }, new string[] { "Field With Documentation" })] + [TestCase("{\"type\":\"record\",\"name\":\"Longs\",\"namespace\":\"space\",\"fields\":[{\"name\":\"value\",\"default\":\"100\",\"type\":\"long\",\"aliases\":[\"oldName\"]}]}", + "Longs", "space", null, null, + new[] { "value" }, new[] { Schema.Type.Long }, new[] { "100" }, new[] { "oldName" }, new string[] { null })] + [TestCase("{\"type\":\"record\",\"name\":\"Longs\",\"doc\":\"Record with alias\",\"namespace\":\"space\",\"aliases\":[\"space.RecordAlias\"],\"fields\":[{\"name\":\"value\",\"default\":\"100\",\"type\":\"long\",\"aliases\":[\"oldName\"]}]}", + "Longs", "space", "RecordAlias", "Record with alias", + new[] { "value" }, new[] { Schema.Type.Long }, new[] { "100" }, new[] { "oldName" }, new string[] { null })] + [TestCase("{\"type\":\"record\",\"name\":\"Longs\",\"doc\":\"Record with two fields\",\"namespace\":\"space\",\"aliases\":[\"space.RecordAlias\"],\"fields\":[{\"name\":\"value\",\"doc\":\"first field\",\"default\":\"100\",\"type\":\"long\",\"aliases\":[\"oldName\"]},{\"name\":\"field2\",\"default\":\"true\",\"type\":\"boolean\"}]}", + "Longs", "space", "RecordAlias", "Record with two fields", + new[] { "value", "field2" }, new[] { Schema.Type.Long, Schema.Type.Boolean }, new[] { "100", "true" }, new[] { "oldName", null }, new string[] { "first field", null })] + public void TestRecordCreation(string expectedSchema, string name, string space, string alias, string documentation, string[] fieldsNames, Schema.Type[] fieldsTypes, object[] fieldsDefaultValues, string[] fieldsAliases, string[] fieldsDocs) + { + IEnumerable recordFields = fieldsNames.Select((fieldName, i) => new Field(PrimitiveSchema.Create(fieldsTypes[i]), + fieldName, + fieldsAliases[i] == null? null: new List { fieldsAliases[i] }, + i, + fieldsDocs[i], + fieldsDefaultValues[i].ToString(), + Field.SortOrder.ignore, + null)); + + string[] aliases = alias == null ? null : new[] { alias }; + + RecordSchema recordSchema = RecordSchema.Create(name, recordFields.ToList(), space, aliases, null, documentation); + + for(int i = 0; i < fieldsNames.Length; i++) + { + var fieldByName = recordSchema[fieldsNames[i]]; + if (fieldsAliases[i] != null) + { + recordSchema.TryGetFieldAlias(fieldsAliases[i], out Field fieldByAlias); + + Assert.AreSame(fieldByAlias, fieldByName); + } + Assert.AreEqual(expectedSchema, recordSchema.ToString()); + Assert.AreEqual(fieldsNames[i], fieldByName.Name); + Assert.AreEqual(i, fieldByName.Pos); + Assert.AreEqual(fieldsTypes[i], fieldByName.Schema.Tag); + Assert.AreEqual(fieldsDocs[i], fieldByName.Documentation); + Assert.AreEqual(fieldsDefaultValues[i], fieldByName.DefaultValue.ToString()); + CollectionAssert.AreEqual(fieldsAliases[i] == null? null: new[] {fieldsAliases[i]}, fieldByName.Aliases); + } + } + + [TestCase] + public void TestRecordCreationWithDuplicateFields() + { + var recordField = new Field(PrimitiveSchema.Create(Schema.Type.Long), + "value", + new List { "oldName" }, + 0, + null, + "100", + Field.SortOrder.ignore, + null); + + Assert.Throws(() => RecordSchema.Create("Longs", + new List + { + recordField, + recordField + })); + } + + [TestCase] + public void TestRecordFieldNames() { + var fields = new List + { + new Field(PrimitiveSchema.Create(Schema.Type.Long), + "æ­ŗäģĨ上", + null, + 0, + null, + null, + Field.SortOrder.ignore, + null) + }; + var recordSchema = RecordSchema.Create("LongList", fields, null, new[] { "LinkedLongs" }); + + Field f = recordSchema.Fields[0]; + Assert.AreEqual("æ­ŗäģĨ上", f.Name); + } + + [TestCase] + public void TestRecordCreationWithRecursiveRecord() + { + string schema = "{\"type\":\"record\",\"name\":\"LongList\",\"aliases\":[\"LinkedLongs\"],\"fields\":[{\"name\":\"value\",\"type\":\"long\"},{\"name\":\"next\",\"type\":[\"null\",\"LongList\"]}]}"; + + var recordSchema = RecordSchema.Create("LongList", new List(), null, new[] { "LinkedLongs" }); + + recordSchema.Fields = new List + { + new Field(PrimitiveSchema.Create(Schema.Type.Long), + "value", + null, + 0, + null, + null, + Field.SortOrder.ignore, + null), + new Field(UnionSchema.Create( + new List + { + PrimitiveSchema.Create(Schema.Type.Null), recordSchema + }), + "next", + 1) + }; + + Assert.AreEqual(schema, recordSchema.ToString()); + } + + [TestCase] + public void TestRecordWithNamedReference() + { + string nestedSchema = "{\"name\":\"NestedRecord\",\"type\":\"record\",\"fields\":[{\"name\":\"stringField\",\"type\":\"string\"}]}"; + // The root schema references the nested schema above by name only. + // This mimics tools that allow schemas to have references to other schemas. + string rootSchema = "{\"name\":\"RootRecord\",\"type\":\"record\",\"fields\":[{\"name\": \"nestedField\",\"type\":\"NestedRecord\"}]}"; + + NamedSchema nestedRecord = (NamedSchema) Schema.Parse(nestedSchema); + + SchemaNames names = new SchemaNames(); + names.Add(nestedRecord.SchemaName, nestedRecord); + + // Pass the schema names when parsing the root schema and its reference. + RecordSchema rootRecord = (RecordSchema) Schema.Parse(rootSchema, names); + Assert.AreEqual("RootRecord", rootRecord.Name); + Assert.AreEqual("NestedRecord", rootRecord.Fields[0].Schema.Name); + } + + [TestCase("{\"type\":\"enum\",\"name\":\"Test\",\"symbols\":[\"A\",\"B\"]}", + new string[] { "A", "B" })] + + [TestCase("{\"type\":\"enum\",\"name\":\"Test\",\"symbols\":[\"A\",\"B\"]}", new string[] { "A", "B" })] - public void TestEnum(string s, string[] symbols) + [TestCase("{\"type\":\"enum\",\"name\":\"Test\",\"doc\":\"Some explanation\",\"namespace\":\"mynamespace\",\"aliases\":[\"mynamespace.Alias\"],\"symbols\":[\"UNKNOWN\",\"A\",\"B\"],\"default\":\"UNKNOWN\",\"propertyKey\":\"propertyValue\"}", + new string[] { "UNKNOWN", "A", "B" }, "mynamespace", new string[] { "Alias" }, "Some explanation", true, "UNKNOWN")] + [TestCase("{\"type\":\"enum\",\"name\":\"Test\",\"doc\":\"Some explanation\",\"namespace\":\"space\",\"aliases\":[\"internalNamespace.Alias\"],\"symbols\":[\"UNKNOWN\",\"A\",\"B\"]}", + new string[] { "UNKNOWN", "A", "B" }, "space", new string[] { "internalNamespace.Alias" }, "Some explanation")] + [TestCase("{\"type\":\"enum\",\"name\":\"Test\",\"doc\":\"Some explanation\",\"namespace\":\"space\",\"aliases\":[\"internalNamespace.Alias\"],\"symbols\":[]}", + new string[] { }, "space", new string[] { "internalNamespace.Alias" }, "Some explanation")] + + public void TestEnum(string s, string[] symbols, string space = null, IEnumerable aliases = null, string doc = null, bool? usePropertyMap = null, string defaultSymbol = null) { Schema sc = Schema.Parse(s); + + PropertyMap propertyMap = new PropertyMap(); + propertyMap.Add("propertyKey", "\"propertyValue\""); + Schema schema = EnumSchema.Create("Test", + symbols, + space, + aliases, + usePropertyMap == true ? propertyMap : null, + doc, + defaultSymbol); + + Assert.AreEqual(sc, schema); + Assert.AreEqual(s, schema.ToString()); + Assert.AreEqual(Schema.Type.Enumeration, sc.Tag); EnumSchema es = sc as EnumSchema; Assert.AreEqual(symbols.Length, es.Count); int i = 0; - foreach (String str in es) + foreach (string str in es) { Assert.AreEqual(symbols[i++], str); } testEquality(s, sc); - testToString(sc); + testToString(sc, s); } [TestCase("{\"type\": \"enum\", \"name\": \"Test\", \"symbols\": [\"A\", \"B\"]}", null)] @@ -250,7 +482,7 @@ public void TestEnumDoc(string s, string expectedDoc) } [TestCase("{\"type\": \"enum\", \"name\": \"Test\", \"symbols\": [\"Unknown\", \"A\", \"B\"], \"default\": \"Unknown\" }", "Unknown")] - public void TestEnumDefault(string s, string expectedToken) + public void TestEnumDefault(string s, string expectedToken) { var es = Schema.Parse(s) as EnumSchema; Assert.IsNotNull(es); @@ -263,18 +495,66 @@ public void TestEnumDefaultSymbolDoesntExist(string s) Assert.Throws(() => Schema.Parse(s)); } + [TestCase("name", new string[] { "A", "B" }, "s", new[] { "L1", "L2" }, "regular enum", null, "name", "s")] + [TestCase("s.name", new string[] { "A", "B" }, null, new[] { "L1", "L2" }, "internal namespace", null, "name", "s")] + [TestCase("name", new string[] { "A", "B" }, null, new[] { "L1", "L2" }, "no namespace", null, "name", null)] + [TestCase("name", new string[] { "A", "B" }, null, new[] { "L1", "L2" }, "with default value", "A", "name", null)] + [TestCase("name", new string[] { "A1B2", "B4324" }, null, new[] { "L1", "L2" }, "with longer symbols", "B4324", "name", null)] + [TestCase("name", new string[] { "_A1B2_", "B4324" }, null, new[] { "L1", "L2" }, "underscore in symbols", "_A1B2_", "name", null)] + public void TestEnumCreation(string name, string[] symbols, string space, string[] aliases, string doc, string defaultSymbol, string expectedName, string expectedNamespace) + { + EnumSchema enumSchema = EnumSchema.Create(name, symbols, space, aliases, null, doc, defaultSymbol); + + Assert.AreEqual(expectedName, enumSchema.Name); + CollectionAssert.AreEqual(symbols, enumSchema.Symbols); + Assert.AreEqual(expectedNamespace, enumSchema.Namespace); + Assert.AreEqual(Schema.Type.Enumeration, enumSchema.Tag); + Assert.AreEqual(doc, enumSchema.Documentation); + Assert.AreEqual(defaultSymbol, enumSchema.Default); + } + + [TestCase(new[] {"A", "B"}, "C")] + [TestCase(new[] {null, "B"}, null)] + [TestCase(new[] {"", "B" }, null)] + [TestCase(new[] {"8", "B" }, null)] + [TestCase(new[] {"8", "B" }, null)] + [TestCase(new[] {"A", "A" }, null)] + [TestCase(new[] {" ", "A" }, null)] + [TestCase(new[] {"9A23", "A" }, null)] + public void TestEnumInvalidSymbols(string[] symbols, string defaultSymbol) + { + Assert.Throws(() => EnumSchema.Create("name", symbols, defaultSymbol: defaultSymbol)); + } + [TestCase("{\"type\": \"array\", \"items\": \"long\"}", "long")] public void TestArray(string s, string item) { Schema sc = Schema.Parse(s); Assert.AreEqual(Schema.Type.Array, sc.Tag); - ArraySchema ars = sc as ArraySchema; + ArraySchema ars = (ArraySchema)sc; Assert.AreEqual(item, ars.ItemSchema.Name); testEquality(s, sc); testToString(sc); } + [TestCase] + public void TestArrayCreation() + { + PrimitiveSchema itemsSchema = PrimitiveSchema.Create(Schema.Type.String); + ArraySchema arraySchema = ArraySchema.Create(itemsSchema); + + Assert.AreEqual("array", arraySchema.Name); + Assert.AreEqual(Schema.Type.Array, arraySchema.Tag); + Assert.AreEqual(itemsSchema, arraySchema.ItemSchema); + } + + [TestCase] + public void TestInvalidArrayCreation() + { + Assert.Throws(() => ArraySchema.Create(null)); + } + [TestCase("{\"type\": \"int\", \"logicalType\": \"date\"}", "int", "date")] public void TestLogicalPrimitive(string s, string baseType, string logicalType) { @@ -288,12 +568,80 @@ public void TestLogicalPrimitive(string s, string baseType, string logicalType) testToString(sc); } + // Make sure unknown type is carried thru to LogicalTypeName [TestCase("{\"type\": \"int\", \"logicalType\": \"unknown\"}", "unknown")] public void TestUnknownLogical(string s, string unknownType) { - var err = Assert.Throws(() => Schema.Parse(s)); + var schema = Schema.Parse(s); + Assert.IsNotNull(schema); + Assert.IsInstanceOf(typeof(LogicalSchema), schema); + + if (schema is LogicalSchema logicalSchema) + { + Assert.IsInstanceOf(typeof(UnknownLogicalType), logicalSchema.LogicalType); + Assert.AreEqual(logicalSchema.LogicalTypeName, unknownType); + } + else + { + Assert.Fail("Parsed schema was not a LogicalSchema"); + } + } + + /* + { + "fields": [ + { + "default": 0, + "name": "firstField", + "type": "int" + }, + { + "default": null, + "name": "secondField", + "type": [ + "null", + { + "logicalType": "varchar", + "maxLength": 65, + "type": "string" + } + ] + } + ], + "name": "sample_schema", + "type": "record" + } + */ + + // Before Change will throw Avro.AvroTypeException: 'Logical type 'varchar' is not supported.' + // Per AVRO Spec (v1.8.0 - v1.11.1) ... Logical Types Section + // Language implementations must ignore unknown logical types when reading, and should use the underlying Avro type. + [TestCase("{\"fields\": [{\"default\": 0,\"name\": \"firstField\",\"type\": \"int\"},{\"default\": null,\"name\": \"secondField\",\"type\": [\"null\",{\"logicalType\": \"varchar\",\"maxLength\": 65,\"type\": \"string\"}]}],\"name\": \"sample_schema\",\"type\": \"record\"}")] + public void TestUnknownLogicalType(string schemaText) + { + var schema = Avro.Schema.Parse(schemaText); + Assert.IsNotNull(schema); - Assert.AreEqual("Logical type '" + unknownType + "' is not supported.", err.Message); + var secondField = ((RecordSchema)schema).Fields.FirstOrDefault(f => f.Name == @"secondField"); + Assert.IsNotNull(secondField); + + var secondFieldSchema = (secondField).Schema; + Assert.IsNotNull(secondFieldSchema); + + var secondFieldUnionSchema = (UnionSchema)secondFieldSchema; + Assert.IsNotNull(secondFieldUnionSchema); + + var props = secondFieldUnionSchema.Schemas.Where(s => s.Props != null).ToList(); + Assert.IsNotNull(props); + Assert.IsTrue(props.Count == 1); + + var prop = props[0]; + // Confirm that the unknown logical type is ignored and the underlying AVRO type is used + Assert.IsTrue(prop.Name == @"string"); + var logicalSchema = prop as LogicalSchema; + Assert.IsInstanceOf(typeof(UnknownLogicalType), logicalSchema.LogicalType); + + Assert.AreEqual(logicalSchema.LogicalTypeName, @"varchar"); } [TestCase("{\"type\": \"map\", \"values\": \"long\"}", "long")] @@ -301,33 +649,82 @@ public void TestMap(string s, string value) { Schema sc = Schema.Parse(s); Assert.AreEqual(Schema.Type.Map, sc.Tag); - MapSchema ms = sc as MapSchema; + MapSchema ms = (MapSchema)sc; Assert.AreEqual(value, ms.ValueSchema.Name); testEquality(s, sc); testToString(sc); } - [TestCase("[\"string\", \"null\", \"long\"]", new string[] { "string", "null", "long" })] - public void TestUnion(string s, string[] types) + [TestCase] + public void TestMapCreation() + { + PrimitiveSchema mapType = PrimitiveSchema.Create(Schema.Type.Float); + MapSchema mapSchema = MapSchema.CreateMap(mapType); + + Assert.AreEqual("map", mapSchema.Fullname); + Assert.AreEqual("map", mapSchema.Name); + Assert.AreEqual(Schema.Type.Map, mapSchema.Tag); + Assert.AreEqual(mapType, mapSchema.ValueSchema); + } + + [TestCase] + public void TestInvalidMapCreation() + { + Assert.Throws(() => MapSchema.CreateMap(null)); + } + + [TestCase("[\"string\", \"null\", \"long\"]", + new Schema.Type[] { Schema.Type.String, Schema.Type.Null, Schema.Type.Long })] + public void TestUnion(string s, Schema.Type[] types) { Schema sc = Schema.Parse(s); + + UnionSchema schema = UnionSchema.Create(types.Select(t => (Schema)PrimitiveSchema.Create(t)).ToList()); + Assert.AreEqual(sc, schema); + Assert.AreEqual(Schema.Type.Union, sc.Tag); - UnionSchema us = sc as UnionSchema; + UnionSchema us = (UnionSchema)sc; Assert.AreEqual(types.Length, us.Count); for (int i = 0; i < us.Count; i++) { - Assert.AreEqual(types[i], us[i].Name); + Assert.AreEqual(types[i].ToString().ToLower(), us[i].Name); } testEquality(s, sc); testToString(sc); } - [TestCase("{ \"type\": \"fixed\", \"name\": \"Test\", \"size\": 1}", 1)] + [TestCase] + public void TestUnionCreation() + { + UnionSchema unionSchema = UnionSchema.Create(new List { PrimitiveSchema.Create(Schema.Type.Null), PrimitiveSchema.Create(Schema.Type.String) }); + + CollectionAssert.AreEqual(new List { PrimitiveSchema.Create(Schema.Type.Null), PrimitiveSchema.Create(Schema.Type.String) }, + unionSchema.Schemas); + } + + [TestCase] + public void TestUnionCreationWithDuplicateSchemas() + { + Assert.Throws(() => UnionSchema.Create(new List { PrimitiveSchema.Create(Schema.Type.String), PrimitiveSchema.Create(Schema.Type.String) })); + } + + [TestCase] + public void TestUnionNestedUnionCreation() + { + Assert.Throws(() => UnionSchema.Create(new List { UnionSchema.Create(new List()), PrimitiveSchema.Create(Schema.Type.String) })); + } + + [TestCase("{\"type\":\"fixed\",\"name\":\"Test\",\"size\":1}", 1)] public void TestFixed(string s, int size) { Schema sc = Schema.Parse(s); + FixedSchema schema = FixedSchema.Create("Test", 1); + + Assert.AreEqual(sc, schema); + Assert.AreEqual(s, schema.ToString()); + Assert.AreEqual(Schema.Type.Fixed, sc.Tag); FixedSchema fs = sc as FixedSchema; Assert.AreEqual(size, fs.Size); @@ -345,10 +742,23 @@ public void TestFixedDoc(string s, string expectedDoc) Assert.AreEqual(expectedDoc, fs.Documentation); } + [TestCase] + public void TestFixedCreation() + { + string s = @"{""type"":""fixed"",""name"":""fixedName"",""namespace"":""space"",""aliases"":[""space.fixedOldName""],""size"":10}"; + + FixedSchema fixedSchema = FixedSchema.Create("fixedName", 10, "space", new[] { "fixedOldName" }, null); + + Assert.AreEqual("fixedName", fixedSchema.Name); + Assert.AreEqual("space.fixedName", fixedSchema.Fullname); + Assert.AreEqual(10, fixedSchema.Size); + Assert.AreEqual(s, fixedSchema.ToString()); + } + [TestCase("a", "o.a.h", ExpectedResult = "o.a.h.a")] public string testFullname(string s1, string s2) { - var name = new SchemaName(s1, s2, null); + var name = new SchemaName(s1, s2, null, null); return name.Fullname; } @@ -367,5 +777,43 @@ public void TestUnionSchemaWithoutTypeProperty(string schemaJson, string expecte var schema = Schema.Parse(schemaJson); Assert.AreEqual(schema.ToString(), expectedSchemaJson); } + + [TestFixture] + public class SchemaTypeExtensionsTests + { + [TestCase("null", Schema.Type.Null)] + [TestCase("boolean", Schema.Type.Boolean)] + [TestCase("int", Schema.Type.Int)] + [TestCase("long", Schema.Type.Long)] + [TestCase("float", Schema.Type.Float)] + [TestCase("double", Schema.Type.Double)] + [TestCase("bytes", Schema.Type.Bytes)] + [TestCase("string", Schema.Type.String)] + [TestCase("record", Schema.Type.Record)] + [TestCase("enumeration", Schema.Type.Enumeration)] + [TestCase("array", Schema.Type.Array)] + [TestCase("map", Schema.Type.Map)] + [TestCase("union", Schema.Type.Union)] + [TestCase("fixed", Schema.Type.Fixed)] + [TestCase("error", Schema.Type.Error)] + [TestCase("logical", Schema.Type.Logical)] + [TestCase("Logical", null)] + [TestCase("InvalidValue", null)] + [TestCase("\"null\"", null)] + [TestCase("", null)] + [TestCase(null, null)] + public void ParseTypeTest(string value, object expectedResult) + { + Assert.AreEqual(Schema.ParseType(value), expectedResult); + } + + [TestCase("\"null\"", Schema.Type.Null)] + [TestCase("\"nu\"ll\"", null)] + [TestCase("\"\"", null)] + public void ParseTypeRemoveQuotesTest(string value, object expectedResult) + { + Assert.AreEqual(Schema.ParseType(value, true), expectedResult); + } + } } } diff --git a/lang/csharp/src/apache/test/Specific/DoubleLongUnionRecord.cs b/lang/csharp/src/apache/test/Specific/DoubleLongUnionRecord.cs new file mode 100644 index 00000000000..97b94be7eed --- /dev/null +++ b/lang/csharp/src/apache/test/Specific/DoubleLongUnionRecord.cs @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// ------------------------------------------------------------------------------ +// +// Generated by avrogen, version 1.11.0.0 +// Changes to this file may cause incorrect behavior and will be lost if code +// is regenerated +// +// ------------------------------------------------------------------------------ +namespace Avro.Test.Specific +{ + using System; + using System.Collections.Generic; + using System.Text; + using Avro; + using Avro.Specific; + + public partial class DoubleLongUnionRecord : ISpecificRecord + { + public static Schema _SCHEMA = Avro.Schema.Parse("{\"type\":\"record\",\"name\":\"DoubleLongUnionRecord\",\"namespace\":\"Avro.Test.Specific\",\"fields\":[{\"name" + + "\":\"Property\",\"type\":[\"double\",\"long\"]}]}"); + private object _Property; + public virtual Schema Schema + { + get + { + return DoubleLongUnionRecord._SCHEMA; + } + } + public object Property + { + get + { + return this._Property; + } + set + { + this._Property = value; + } + } + public virtual object Get(int fieldPos) + { + switch (fieldPos) + { + case 0: return this.Property; + default: throw new AvroRuntimeException("Bad index " + fieldPos + " in Get()"); + }; + } + public virtual void Put(int fieldPos, object fieldValue) + { + switch (fieldPos) + { + case 0: this.Property = (System.Object)fieldValue; break; + default: throw new AvroRuntimeException("Bad index " + fieldPos + " in Put()"); + }; + } + } +} diff --git a/lang/csharp/src/apache/test/Specific/RecordWithOptionalLogicalType.cs b/lang/csharp/src/apache/test/Specific/RecordWithOptionalLogicalType.cs new file mode 100644 index 00000000000..585032e9bce --- /dev/null +++ b/lang/csharp/src/apache/test/Specific/RecordWithOptionalLogicalType.cs @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// ------------------------------------------------------------------------------ +// +// Generated by avrogen, version 1.11.0.0 +// Changes to this file may cause incorrect behavior and will be lost if code +// is regenerated +// +// ------------------------------------------------------------------------------ +namespace Avro.Test.Specific.@return +{ + using System; + using System.Collections.Generic; + using System.Text; + using Avro; + using Avro.Specific; + + public partial class RecordWithOptionalLogicalType : ISpecificRecord + { + public static Schema _SCHEMA = Avro.Schema.Parse("{\"type\":\"record\",\"name\":\"RecordWithOptionalLogicalType\",\"namespace\":\"Avro.Test.Sp" + + "ecific.return\",\"fields\":[{\"name\":\"x\",\"default\":10,\"type\":{\"type\":\"int\",\"logicalT" + + "ype\":\"date\"}}]}"); + private System.DateTime _x; + public virtual Schema Schema + { + get + { + return RecordWithOptionalLogicalType._SCHEMA; + } + } + public System.DateTime x + { + get + { + return this._x; + } + set + { + this._x = value; + } + } + public virtual object Get(int fieldPos) + { + switch (fieldPos) + { + case 0: return this.x; + default: throw new AvroRuntimeException("Bad index " + fieldPos + " in Get()"); + }; + } + public virtual void Put(int fieldPos, object fieldValue) + { + switch (fieldPos) + { + case 0: this.x = (System.DateTime)fieldValue; break; + default: throw new AvroRuntimeException("Bad index " + fieldPos + " in Put()"); + }; + } + } +} diff --git a/lang/csharp/src/apache/test/Specific/SpecificTests.cs b/lang/csharp/src/apache/test/Specific/SpecificTests.cs index 168c4d0bba5..1aa3c3a03ae 100644 --- a/lang/csharp/src/apache/test/Specific/SpecificTests.cs +++ b/lang/csharp/src/apache/test/Specific/SpecificTests.cs @@ -16,6 +16,7 @@ * limitations under the License. */ +using System; using System.Collections; using System.IO; using NUnit.Framework; @@ -23,8 +24,16 @@ using Avro.Specific; using Avro.Test.Specific; using System.Collections.Generic; +using Avro.Generic; +using Avro.Test.Generic; using Avro.Test.Specific.@return; +#if !NETCOREAPP +using System.CodeDom; +using System.CodeDom.Compiler; +using System.Reflection; +#endif + namespace Avro.Test { [TestFixture] @@ -263,6 +272,38 @@ public void TestEnumDefault() Assert.AreEqual(EnumType.DEFAULT, rec2.enumType); } + [TestCase(0L)] + [TestCase(100L)] + [TestCase(-100L)] + [TestCase(0.0)] + [TestCase(100.0)] + [TestCase(-100.0)] + public void TestDoubleLongUnion(object value) + { + var testRecord = new DoubleLongUnionRecord(); + testRecord.Property = value; + + // serialize + var stream = serialize(DoubleLongUnionRecord._SCHEMA, testRecord); + + // deserialize + var rec2 = deserialize(stream, DoubleLongUnionRecord._SCHEMA, DoubleLongUnionRecord._SCHEMA); + Assert.AreEqual(value, rec2.Property); + Assert.AreEqual(value.GetType(), rec2.Property.GetType()); + } + + [TestCase(0)] + [TestCase(100)] + [TestCase(-100)] + [TestCase(0.0f)] + [TestCase(100.0f)] + [TestCase(-100.0f)] + [TestCase("0")] + [TestCase("100")] + public void TestDoubleLongUnionNoMatchException(object value) + { + Assert.Throws(() => serialize(DoubleLongUnionRecord._SCHEMA, new DoubleLongUnionRecord() { Property = value })); + } [Test] public void TestArrayWithReservedWords() @@ -433,6 +474,43 @@ public void TestEmbeddedGenerics() Assert.AreEqual(0, dstRecord.UserMatrix[2].Count); } + private static void serializeGeneric(string writerSchema, T actual, out Stream stream, out Schema ws) + { + var ms = new MemoryStream(); + Encoder e = new BinaryEncoder(ms); + ws = Schema.Parse(writerSchema); + GenericWriter w = new GenericWriter(ws); + w.Write(actual, e); + ms.Flush(); + ms.Position = 0; + stream = ms; + } + + [Test] + public void DeserializeToLogicalTypeWithDefault() + { + var writerSchemaString = @"{ + ""type"": ""record"", + ""name"": ""RecordWithOptionalLogicalType"", + ""namespace"": ""Avro.Test.Specific.return"", + ""fields"": [ + ]}"; + + var writerSchema = Schema.Parse(writerSchemaString); + + Stream stream; + + serializeGeneric(writerSchemaString, + GenericTests.MkRecord(new object[] { }, (RecordSchema)writerSchema), + out stream, + out _); + + RecordWithOptionalLogicalType output = deserialize(stream, writerSchema, RecordWithOptionalLogicalType._SCHEMA); + + Assert.AreEqual(output.x, new DateTime(1970, 1, 11)); + + } + private static S deserialize(Stream ms, Schema ws, Schema rs) where S : class, ISpecificRecord { long initialPos = ms.Position; @@ -526,7 +604,7 @@ private static void AssertSpecificRecordEqual(ISpecificRecord rec1, ISpecificRec } /// - /// Asserts that two lists are equal, delegating the work of comapring + /// Asserts that two lists are equal, delegating the work of comparing /// entries to /// . /// diff --git a/lang/csharp/src/apache/test/Util/LogicalTypeTests.cs b/lang/csharp/src/apache/test/Util/LogicalTypeTests.cs index e4d6b052af9..0129b2a5b45 100644 --- a/lang/csharp/src/apache/test/Util/LogicalTypeTests.cs +++ b/lang/csharp/src/apache/test/Util/LogicalTypeTests.cs @@ -18,6 +18,7 @@ using System; using System.Globalization; +using System.Numerics; using Avro.Util; using NUnit.Framework; @@ -26,28 +27,95 @@ namespace Avro.Test [TestFixture] class LogicalTypeTests { - [TestCase("1234.56")] - [TestCase("-1234.56")] - [TestCase("123456789123456789.56")] - [TestCase("-123456789123456789.56")] - [TestCase("000000000000000001.01")] - [TestCase("-000000000000000001.01")] - public void TestDecimal(string s) + [TestCase("0", 0, new byte[] { 0 })] + [TestCase("1.01", 2, new byte[] { 101 })] + [TestCase("123456789123456789.56", 2, new byte[] { 0, 171, 84, 169, 143, 129, 101, 36, 108 })] + [TestCase("1234", 0, new byte[] { 4, 210 })] + [TestCase("1234.5", 1, new byte[] { 48, 57 })] + [TestCase("1234.56", 2, new byte[] { 1, 226, 64 })] + [TestCase("-0", 0, new byte[] { 0 })] + [TestCase("-1.01", 2, new byte[] { 155 })] + [TestCase("-123456789123456789.56", 2, new byte[] { 255, 84, 171, 86, 112, 126, 154, 219, 148 })] + [TestCase("-1234", 0, new byte[] { 251, 46 })] + [TestCase("-1234.5", 1, new byte[] { 207, 199 })] + [TestCase("-1234.56", 2, new byte[] { 254, 29, 192 })] + // This tests ensures that changes to Decimal.ConvertToBaseValue and ConvertToLogicalValue can be validated (bytes) + public void TestDecimalConvert(string s, int scale, byte[] converted) { - var schema = (LogicalSchema)Schema.Parse("{\"type\": \"bytes\", \"logicalType\": \"decimal\", \"precision\": 4, \"scale\": 2 }"); + var schema = (LogicalSchema)Schema.Parse(@$"{{""type"": ""bytes"", ""logicalType"": ""decimal"", ""precision"": 4, ""scale"": {scale}}}"); var avroDecimal = new Avro.Util.Decimal(); - var decimalVal = (AvroDecimal)decimal.Parse(s); + // CultureInfo.InvariantCulture ensures that "." is always accepted as the decimal point + var decimalVal = (AvroDecimal)decimal.Parse(s, CultureInfo.InvariantCulture); + + // TestDecimal tests ConvertToLogicalValue(ConvertToBaseValue(...)) which might hide symmetrical breaking changes in both functions + // The following 2 tests are checking the conversions separately + + // Validate Decimal.ConvertToBaseValue + Assert.AreEqual(converted, avroDecimal.ConvertToBaseValue(decimalVal, schema)); + + // Validate Decimal.ConvertToLogicalValue + Assert.AreEqual(decimalVal, (AvroDecimal)avroDecimal.ConvertToLogicalValue(converted, schema)); + } + + [Test] + public void TestDecimal( + [Values( + "1234.56", + "-1234.56", + "123456789123456789.56", + "-123456789123456789.56", + "000000000000000001.01", + "-000000000000000001.01" + )] string s, + [Values( + "\"bytes\"", + "{\"type\": \"fixed\", \"size\": 16, \"name\": \"n\"}" + )] string baseType) + { + var schema = (LogicalSchema)Schema.Parse($"{{\"type\": {baseType}, \"logicalType\": \"decimal\", \"precision\": 4, \"scale\": 2 }}"); + + var avroDecimal = new Avro.Util.Decimal(); + // CultureInfo.InvariantCulture ensures that "." is always accepted as the decimal point + var decimalVal = (AvroDecimal)decimal.Parse(s, CultureInfo.InvariantCulture); var convertedDecimalVal = (AvroDecimal)avroDecimal.ConvertToLogicalValue(avroDecimal.ConvertToBaseValue(decimalVal, schema), schema); Assert.AreEqual(decimalVal, convertedDecimalVal); } - [TestCase] - public void TestDecimalMinMax() + [Test] + public void TestDecimalScale( + [Values( + "0", + "1", + "-1", + "1234567891234567890123456789", + "-1234567891234567890123456789", + "0000000000000000000000000001", + "-0000000000000000000000000001" + )] string s, + [Values(1, 2, 3, 4, 5, 6, 7, 8)] int scale, + [Values( + "\"bytes\"", + "{\"type\": \"fixed\", \"size\": 16, \"name\": \"n\"}" + )] string baseType) { - var schema = (LogicalSchema)Schema.Parse("{\"type\": \"bytes\", \"logicalType\": \"decimal\", \"precision\": 4, \"scale\": 0 }"); + var schema = (LogicalSchema)Schema.Parse($"{{\"type\": {baseType}, \"logicalType\": \"decimal\", \"precision\": 8, \"scale\": {scale} }}"); + + var avroDecimal = new Avro.Util.Decimal(); + var decimalVal = new AvroDecimal(BigInteger.Parse(s), scale); + + var convertedDecimalVal = (AvroDecimal)avroDecimal.ConvertToLogicalValue(avroDecimal.ConvertToBaseValue(decimalVal, schema), schema); + + Assert.AreEqual(decimalVal, convertedDecimalVal); + } + + [TestCase("\"bytes\"")] + [TestCase("{\"type\": \"fixed\", \"size\": 16, \"name\": \"n\"}")] + public void TestDecimalMinMax(string baseType) + { + var schema = (LogicalSchema)Schema.Parse($"{{\"type\": {baseType}, \"logicalType\": \"decimal\", \"precision\": 4, \"scale\": 0 }}"); var avroDecimal = new Avro.Util.Decimal(); @@ -59,10 +127,11 @@ public void TestDecimalMinMax() } } - [TestCase] - public void TestDecimalOutOfRangeException() + [TestCase("\"bytes\"")] + [TestCase("{\"type\": \"fixed\", \"size\": 16, \"name\": \"n\"}")] + public void TestDecimalOutOfRangeException(string baseType) { - var schema = (LogicalSchema)Schema.Parse("{\"type\": \"bytes\", \"logicalType\": \"decimal\", \"precision\": 4, \"scale\": 2 }"); + var schema = (LogicalSchema)Schema.Parse($"{{\"type\": {baseType}, \"logicalType\": \"decimal\", \"precision\": 4, \"scale\": 2 }}"); var avroDecimal = new Avro.Util.Decimal(); var decimalVal = (AvroDecimal)1234.567M; // scale of 3 should throw ArgumentOutOfRangeException @@ -75,6 +144,14 @@ public void TestDecimalOutOfRangeException() [TestCase("05/05/2019 00:00:00Z")] [TestCase("05/05/2019 01:00:00Z")] [TestCase("05/05/2019 01:00:00+01:00")] + [TestCase("05/05/2019 01:00:00.1Z")] + [TestCase("05/05/2019 01:00:00.01Z")] + [TestCase("05/05/2019 01:00:00.001Z")] + [TestCase("05/05/2019 01:00:00.0001Z")] + [TestCase("05/05/2019 01:00:00.00001Z")] + [TestCase("05/05/2019 01:00:00.000001Z")] + [TestCase("05/05/2019 01:00:00.0000001Z")] + [TestCase("05/05/2019 01:00:00.00000001Z")] public void TestDate(string s) { var schema = (LogicalSchema)Schema.Parse("{\"type\": \"int\", \"logicalType\": \"date\"}"); @@ -100,6 +177,12 @@ public void TestDate(string s) [TestCase("05/05/2019 14:20:00+01:00", "05/05/2019 13:20:00Z")] [TestCase("05/05/2019 00:00:00Z", "05/05/2019 00:00:00Z")] [TestCase("05/05/2019 00:00:00+01:00", "05/04/2019 23:00:00Z")] // adjusted to UTC + [TestCase("01/01/2019 14:20:00.1Z", "01/01/2019 14:20:00.1Z")] + [TestCase("01/01/2019 14:20:00.01Z", "01/01/2019 14:20:00.01Z")] + [TestCase("01/01/2019 14:20:00.001Z", "01/01/2019 14:20:00.001Z")] + [TestCase("01/01/2019 14:20:00.0001Z", "01/01/2019 14:20:00Z")] + [TestCase("01/01/2019 14:20:00.0009Z", "01/01/2019 14:20:00Z")] // there is no rounding up + [TestCase("01/01/2019 14:20:00.0019Z", "01/01/2019 14:20:00.001Z")] // there is no rounding up public void TestTimestampMillisecond(string s, string e) { var schema = (LogicalSchema)Schema.Parse("{\"type\": \"long\", \"logicalType\": \"timestamp-millis\"}"); @@ -116,6 +199,7 @@ public void TestTimestampMillisecond(string s, string e) var avroTimestampMilli = new TimestampMillisecond(); var convertedDate = (DateTime)avroTimestampMilli.ConvertToLogicalValue(avroTimestampMilli.ConvertToBaseValue(date, schema), schema); Assert.AreEqual(expectedDate, convertedDate); + Assert.AreEqual(DateTimeKind.Utc, convertedDate.Kind); } [TestCase("01/01/2019 14:20:00Z", "01/01/2019 14:20:00Z")] @@ -124,6 +208,15 @@ public void TestTimestampMillisecond(string s, string e) [TestCase("05/05/2019 14:20:00+01:00", "05/05/2019 13:20:00Z")] [TestCase("05/05/2019 00:00:00Z", "05/05/2019 00:00:00Z")] [TestCase("05/05/2019 00:00:00+01:00", "05/04/2019 23:00:00Z")] // adjusted to UTC + [TestCase("01/01/2019 14:20:00.1Z", "01/01/2019 14:20:00.1Z")] + [TestCase("01/01/2019 14:20:00.01Z", "01/01/2019 14:20:00.01Z")] + [TestCase("01/01/2019 14:20:00.001Z", "01/01/2019 14:20:00.001Z")] + [TestCase("01/01/2019 14:20:00.0001Z", "01/01/2019 14:20:00.0001Z")] + [TestCase("01/01/2019 14:20:00.00001Z", "01/01/2019 14:20:00.00001Z")] + [TestCase("01/01/2019 14:20:00.000001Z", "01/01/2019 14:20:00.000001Z")] + [TestCase("01/01/2019 14:20:00.0000001Z", "01/01/2019 14:20:00Z")] + [TestCase("01/01/2019 14:20:00.0000009Z", "01/01/2019 14:20:00Z")] // there is no rounding up + [TestCase("01/01/2019 14:20:00.0000019Z", "01/01/2019 14:20:00.000001Z")] // there is no rounding up public void TestTimestampMicrosecond(string s, string e) { var schema = (LogicalSchema)Schema.Parse("{\"type\": \"long\", \"logicalType\": \"timestamp-micros\"}"); @@ -140,20 +233,117 @@ public void TestTimestampMicrosecond(string s, string e) var avroTimestampMicro = new TimestampMicrosecond(); var convertedDate = (DateTime)avroTimestampMicro.ConvertToLogicalValue(avroTimestampMicro.ConvertToBaseValue(date, schema), schema); Assert.AreEqual(expectedDate, convertedDate); + Assert.AreEqual(DateTimeKind.Utc, convertedDate.Kind); + } + + [TestCase("01/01/2019 14:20:00", "01/01/2019 14:20:00")] + [TestCase("05/05/2019 14:20:00", "05/05/2019 14:20:00")] + [TestCase("05/05/2019 00:00:00", "05/05/2019 00:00:00")] + [TestCase("01/01/2019 14:20:00.1", "01/01/2019 14:20:00.1")] + [TestCase("01/01/2019 14:20:00.01", "01/01/2019 14:20:00.01")] + [TestCase("01/01/2019 14:20:00.001", "01/01/2019 14:20:00.001")] + [TestCase("01/01/2019 14:20:00.0001", "01/01/2019 14:20:00")] + [TestCase("01/01/2019 14:20:00.0009", "01/01/2019 14:20:00")] // there is no rounding up + [TestCase("01/01/2019 14:20:00.0019", "01/01/2019 14:20:00.001")] // there is no rounding up + [TestCase("01/01/2019 14:20:00Z", "01/01/2019 14:20:00Z")] // UTC timestamps, but will check will in local TZ + [TestCase("01/01/2019 14:20:00.1Z", "01/01/2019 14:20:00.1Z")] + [TestCase("01/01/2019 14:20:00.01Z", "01/01/2019 14:20:00.01Z")] + [TestCase("01/01/2019 14:20:00.001Z", "01/01/2019 14:20:00.001Z")] + public void TestLocalTimestampMillisecond(string s, string e) + { + var schema = (LogicalSchema)Schema.Parse("{\"type\": \"long\", \"logicalType\": \"local-timestamp-millis\"}"); + + var date = DateTime.Parse(s, CultureInfo.GetCultureInfo("en-US").DateTimeFormat, DateTimeStyles.RoundtripKind); + + if (date.Kind != DateTimeKind.Utc) + { + date = DateTime.Parse(s, CultureInfo.GetCultureInfo("en-US").DateTimeFormat, DateTimeStyles.AssumeLocal); + } + + var expectedDate = DateTime.Parse(e, CultureInfo.GetCultureInfo("en-US").DateTimeFormat, DateTimeStyles.RoundtripKind); + + if (expectedDate.Kind != DateTimeKind.Utc) + { + expectedDate = DateTime.Parse(e, CultureInfo.GetCultureInfo("en-US").DateTimeFormat, DateTimeStyles.AssumeLocal); + } + + expectedDate = expectedDate.ToLocalTime(); + + var avroLocalTimestampMilli = new LocalTimestampMillisecond(); + var convertedDate = (DateTime)avroLocalTimestampMilli.ConvertToLogicalValue(avroLocalTimestampMilli.ConvertToBaseValue(date, schema), schema); + Assert.AreEqual(expectedDate, convertedDate); + Assert.AreEqual(DateTimeKind.Local, convertedDate.Kind); + } + + [TestCase("01/01/2019 14:20:00", "01/01/2019 14:20:00")] + [TestCase("05/05/2019 14:20:00", "05/05/2019 14:20:00")] + [TestCase("05/05/2019 00:00:00", "05/05/2019 00:00:00")] + [TestCase("01/01/2019 14:20:00.1", "01/01/2019 14:20:00.1")] + [TestCase("01/01/2019 14:20:00.01", "01/01/2019 14:20:00.01")] + [TestCase("01/01/2019 14:20:00.001", "01/01/2019 14:20:00.001")] + [TestCase("01/01/2019 14:20:00.0001", "01/01/2019 14:20:00.0001")] + [TestCase("01/01/2019 14:20:00.00001", "01/01/2019 14:20:00.00001")] + [TestCase("01/01/2019 14:20:00.000001", "01/01/2019 14:20:00.000001")] + [TestCase("01/01/2019 14:20:00.0000001", "01/01/2019 14:20:00")] + [TestCase("01/01/2019 14:20:00.0000009", "01/01/2019 14:20:00")] // there is no rounding up + [TestCase("01/01/2019 14:20:00.0000019", "01/01/2019 14:20:00.000001")] // there is no rounding up + [TestCase("01/01/2019 14:20:00Z", "01/01/2019 14:20:00Z")] // UTC timestamps, but will check will in local TZ + [TestCase("01/01/2019 14:20:00.1Z", "01/01/2019 14:20:00.1Z")] + [TestCase("01/01/2019 14:20:00.01Z", "01/01/2019 14:20:00.01Z")] + [TestCase("01/01/2019 14:20:00.001Z", "01/01/2019 14:20:00.001Z")] + [TestCase("01/01/2019 14:20:00.0001Z", "01/01/2019 14:20:00.0001Z")] + [TestCase("01/01/2019 14:20:00.00001Z", "01/01/2019 14:20:00.00001Z")] + [TestCase("01/01/2019 14:20:00.000001Z", "01/01/2019 14:20:00.000001Z")] + public void TestLocalTimestampMicrosecond(string s, string e) + { + var schema = (LogicalSchema)Schema.Parse("{\"type\": \"long\", \"logicalType\": \"local-timestamp-micros\"}"); + + var date = DateTime.Parse(s, CultureInfo.GetCultureInfo("en-US").DateTimeFormat, DateTimeStyles.RoundtripKind); + + if (date.Kind != DateTimeKind.Utc) + { + date = DateTime.Parse(s, CultureInfo.GetCultureInfo("en-US").DateTimeFormat, DateTimeStyles.AssumeLocal); + } + + var expectedDate = DateTime.Parse(e, CultureInfo.GetCultureInfo("en-US").DateTimeFormat, DateTimeStyles.RoundtripKind); + + if (expectedDate.Kind != DateTimeKind.Utc) + { + expectedDate = DateTime.Parse(e, CultureInfo.GetCultureInfo("en-US").DateTimeFormat, DateTimeStyles.AssumeLocal); + } + + expectedDate = expectedDate.ToLocalTime(); + + var avroLocalTimestampMicro = new LocalTimestampMicrosecond(); + var convertedDate = (DateTime)avroLocalTimestampMicro.ConvertToLogicalValue(avroLocalTimestampMicro.ConvertToBaseValue(date, schema), schema); + Assert.AreEqual(expectedDate, convertedDate); + Assert.AreEqual(DateTimeKind.Local, convertedDate.Kind); } [TestCase("01:20:10", "01:20:10", false)] [TestCase("23:00:00", "23:00:00", false)] + [TestCase("23:59:00", "23:59:00", false)] + [TestCase("23:59:59", "23:59:59", false)] + [TestCase("01:20:10.1", "01:20:10.1", false)] + [TestCase("01:20:10.01", "01:20:10.01", false)] + [TestCase("01:20:10.001", "01:20:10.001", false)] + [TestCase("01:20:10.0001", "01:20:10", false)] + [TestCase("01:20:10.0009", "01:20:10", false)] // there is no rounding up + [TestCase("01:20:10.0019", "01:20:10.001", false)] // there is no rounding up + [TestCase("23:59:59.999", "23:59:59.999", false)] [TestCase("01:00:00:00", null, true)] - public void TestTime(string s, string e, bool expectRangeError) + [TestCase("-00:00:00.001", null, true)] + [TestCase("-00:00:00.000001", null, true)] + [TestCase("-00:00:00.0000001", null, true)] + [TestCase("-00:01", null, true)] + [TestCase("-999999.00:00:00", null, true)] + public void TestTimeMillisecond(string s, string e, bool expectRangeError) { var timeMilliSchema = (LogicalSchema)Schema.Parse("{\"type\": \"int\", \"logicalType\": \"time-millis\"}"); - var timeMicroSchema = (LogicalSchema)Schema.Parse("{\"type\": \"long\", \"logicalType\": \"time-micros\"}"); var time = TimeSpan.Parse(s); - + var avroTimeMilli = new TimeMillisecond(); - var avroTimeMicro = new TimeMicrosecond(); if (expectRangeError) { @@ -161,10 +351,6 @@ public void TestTime(string s, string e, bool expectRangeError) { avroTimeMilli.ConvertToLogicalValue(avroTimeMilli.ConvertToBaseValue(time, timeMilliSchema), timeMilliSchema); }); - Assert.Throws(() => - { - avroTimeMicro.ConvertToLogicalValue(avroTimeMilli.ConvertToBaseValue(time, timeMicroSchema), timeMicroSchema); - }); } else { @@ -172,8 +358,48 @@ public void TestTime(string s, string e, bool expectRangeError) var convertedTime = (TimeSpan)avroTimeMilli.ConvertToLogicalValue(avroTimeMilli.ConvertToBaseValue(time, timeMilliSchema), timeMilliSchema); Assert.AreEqual(expectedTime, convertedTime); + } + } + + [TestCase("01:20:10", "01:20:10", false)] + [TestCase("23:00:00", "23:00:00", false)] + [TestCase("23:59:00", "23:59:00", false)] + [TestCase("23:59:59", "23:59:59", false)] + [TestCase("01:20:10.1", "01:20:10.1", false)] + [TestCase("01:20:10.01", "01:20:10.01", false)] + [TestCase("01:20:10.001", "01:20:10.001", false)] + [TestCase("01:20:10.0001", "01:20:10.0001", false)] + [TestCase("01:20:10.00001", "01:20:10.00001", false)] + [TestCase("01:20:10.000001", "01:20:10.000001", false)] + [TestCase("01:20:10.0000001", "01:20:10", false)] + [TestCase("01:20:10.0000009", "01:20:10", false)] + [TestCase("23:59:59.999999", "23:59:59.999999", false)] + [TestCase("01:00:00:00", null, true)] + [TestCase("-00:00:00.001", null, true)] + [TestCase("-00:00:00.000001", null, true)] + [TestCase("-00:00:00.0000001", null, true)] + [TestCase("-00:01", null, true)] + [TestCase("-999999.00:00:00", null, true)] + public void TestTimeMicrosecond(string s, string e, bool expectRangeError) + { + var timeMicroSchema = (LogicalSchema)Schema.Parse("{\"type\": \"long\", \"logicalType\": \"time-micros\"}"); + + var time = TimeSpan.Parse(s); + + var avroTimeMicro = new TimeMicrosecond(); + + if (expectRangeError) + { + Assert.Throws(() => + { + avroTimeMicro.ConvertToLogicalValue(avroTimeMicro.ConvertToBaseValue(time, timeMicroSchema), timeMicroSchema); + }); + } + else + { + var expectedTime = TimeSpan.Parse(e); - convertedTime = (TimeSpan)avroTimeMicro.ConvertToLogicalValue(avroTimeMicro.ConvertToBaseValue(time, timeMicroSchema), timeMicroSchema); + var convertedTime = (TimeSpan)avroTimeMicro.ConvertToLogicalValue(avroTimeMicro.ConvertToBaseValue(time, timeMicroSchema), timeMicroSchema); Assert.AreEqual(expectedTime, convertedTime); } diff --git a/lang/csharp/src/apache/test/Util/UnknownLogicalTypeTests.cs b/lang/csharp/src/apache/test/Util/UnknownLogicalTypeTests.cs new file mode 100644 index 00000000000..43cc2f3a462 --- /dev/null +++ b/lang/csharp/src/apache/test/Util/UnknownLogicalTypeTests.cs @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Data.SqlTypes; +using System.Globalization; +using System.Net.Sockets; +using System.Numerics; +using Avro.Util; +using NUnit.Framework; +using NUnit.Framework.Constraints; + +namespace Avro.test.Util +{ + /// + /// This tests added to confirm standalone operation of new type UnknownLogicalType that implements LogicalType + /// + [TestFixture] + class UnknownLogicalTypeTests + { + [TestCase(typeof(System.String), "", "{\"type\": \"string\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Boolean), true, "{\"type\": \"boolean\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Int32), Int32.MinValue, "{\"type\": \"int\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Int64), Int64.MinValue, "{\"type\": \"long\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Single), Single.MinValue, "{\"type\": \"float\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Double), Double.MinValue, "{\"type\": \"double\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Byte[]), new byte[] { }, "{\"type\": \"bytes\", \"logicalType\": \"unknown\"}")] + public void TestConvertToBaseValue_IsTrue(Type baseType, object logicalValue, string schemaText) + { + var schema = (LogicalSchema)Schema.Parse(schemaText); + + var logicalType = new UnknownLogicalType(schema); + + var baseValue = logicalType.ConvertToBaseValue(logicalValue, schema); + + Assert.AreEqual(baseValue, Convert.ChangeType(logicalValue, baseType)); + } + + [TestCase(typeof(System.Byte[]), "", "{\"type\": \"string\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Double), true, "{\"type\": \"boolean\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Single), Int32.MinValue, "{\"type\": \"int\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Boolean), Int64.MinValue, "{\"type\": \"long\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Int32), Single.MinValue, "{\"type\": \"float\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Int64), Double.MinValue, "{\"type\": \"double\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.String), new byte[] { }, "{\"type\": \"bytes\", \"logicalType\": \"unknown\"}")] + public void TestConvertToBaseValue_IsFalse(Type baseType, object logicalValue, string schemaText) + { + var schema = (LogicalSchema)Schema.Parse(schemaText); + + var logicalType = new UnknownLogicalType(schema); + + var baseValue = logicalType.ConvertToBaseValue(logicalValue, schema); + + Assert.AreNotEqual(baseValue.GetType(), baseType); + } + + [TestCase(typeof(System.String), "", "{\"type\": \"string\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Boolean), true, "{\"type\": \"boolean\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Int32), Int32.MinValue, "{\"type\": \"int\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Int64), Int64.MinValue, "{\"type\": \"long\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Single), Single.MinValue, "{\"type\": \"float\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Double), Double.MinValue, "{\"type\": \"double\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Byte[]), new byte[] { }, "{\"type\": \"bytes\", \"logicalType\": \"unknown\"}")] + public void TestConvertToLogicalValue_IsTrue(Type baseType, object logicalValue, string schemaText) + { + var schema = (LogicalSchema)Schema.Parse(schemaText); + + var logicalType = new UnknownLogicalType(schema); + + var baseValue = logicalType.ConvertToLogicalValue(logicalValue, schema); + + Assert.AreEqual(baseValue, Convert.ChangeType(logicalValue, baseType)); + } + + [TestCase(typeof(System.Byte[]), "", "{\"type\": \"string\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Double), true, "{\"type\": \"boolean\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Single), Int32.MinValue, "{\"type\": \"int\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Boolean), Int64.MinValue, "{\"type\": \"long\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Int32), Single.MinValue, "{\"type\": \"float\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Int64), Double.MinValue, "{\"type\": \"double\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.String), new byte[] { }, "{\"type\": \"bytes\", \"logicalType\": \"unknown\"}")] + public void TestConvertToLogicalValue_IsFalse(Type baseType, object logicalValue, string schemaText) + { + var schema = (LogicalSchema)Schema.Parse(schemaText); + + var logicalType = new UnknownLogicalType(schema); + + var baseValue = logicalType.ConvertToLogicalValue(logicalValue, schema); + + Assert.AreNotEqual(baseValue.GetType(), baseType); + } + + [TestCase(typeof(System.String), false, "{\"type\": \"string\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Boolean), false, "{\"type\": \"boolean\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Int32), false, "{\"type\": \"int\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Int64), false, "{\"type\": \"long\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Single), false, "{\"type\": \"float\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Double), false, "{\"type\": \"double\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Byte[]), false, "{\"type\": \"bytes\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.String), true, "{\"type\": \"string\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Boolean?), true, "{\"type\": \"boolean\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Int32?), true, "{\"type\": \"int\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Int64?), true, "{\"type\": \"long\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Single?), true, "{\"type\": \"float\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Double?), true, "{\"type\": \"double\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Byte[]), true, "{\"type\": \"bytes\", \"logicalType\": \"unknown\"}")] + public void TestGetCSharpType_IsTrue(Type type, bool isNullable, string schemaText) + { + var schema = (LogicalSchema)Schema.Parse(schemaText); + + var logicalType = new UnknownLogicalType(schema); + + Assert.AreEqual(logicalType.GetCSharpType(isNullable), type); + } + + //[TestCase(typeof(System.String), true, "{\"type\": \"string\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Boolean), true, "{\"type\": \"boolean\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Int32), true, "{\"type\": \"int\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Int64), true, "{\"type\": \"long\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Single), true, "{\"type\": \"float\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Double), true, "{\"type\": \"double\", \"logicalType\": \"unknown\"}")] + //[TestCase(typeof(System.Byte[]), true, "{\"type\": \"bytes\", \"logicalType\": \"unknown\"}")] + //[TestCase(typeof(System.String), false, "{\"type\": \"string\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Boolean?), false, "{\"type\": \"boolean\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Int32?), false, "{\"type\": \"int\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Int64?), false, "{\"type\": \"long\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Single?), false, "{\"type\": \"float\", \"logicalType\": \"unknown\"}")] + [TestCase(typeof(System.Double?), false, "{\"type\": \"double\", \"logicalType\": \"unknown\"}")] + //[TestCase(typeof(System.Byte?[]), false, "{\"type\": \"bytes\", \"logicalType\": \"unknown\"}")] + public void TestGetCSharpType_IsFalse(Type type, bool isNullable, string schemaText) + { + var schema = (LogicalSchema)Schema.Parse(schemaText); + + var logicalType = new UnknownLogicalType(schema); + + Assert.AreNotEqual(logicalType.GetCSharpType(isNullable), type); + } + + [TestCase("", "{\"type\": \"string\", \"logicalType\": \"unknown\"}")] + [TestCase(true, "{\"type\": \"boolean\", \"logicalType\": \"unknown\"}")] + [TestCase(Int32.MinValue, "{\"type\": \"int\", \"logicalType\": \"unknown\"}")] + [TestCase(Int64.MinValue, "{\"type\": \"long\", \"logicalType\": \"unknown\"}")] + [TestCase(Single.MinValue, "{\"type\": \"float\", \"logicalType\": \"unknown\"}")] + [TestCase(Double.MinValue, "{\"type\": \"double\", \"logicalType\": \"unknown\"}")] + [TestCase(new byte[] { } , "{\"type\": \"bytes\", \"logicalType\": \"unknown\"}")] + public void TestIsInstanceOfLogicalType_IsTrue(object logicalValue, string schemaText) + { + var schema = (LogicalSchema)Schema.Parse(schemaText); + + var logicalType = new UnknownLogicalType(schema); + + Assert.IsTrue(logicalType.IsInstanceOfLogicalType(logicalValue)); + } + + [TestCase(Int32.MinValue, "{\"type\": \"string\", \"logicalType\": \"unknown\"}")] + [TestCase(new byte[] { }, "{\"type\": \"boolean\", \"logicalType\": \"unknown\"}")] + [TestCase(Int64.MinValue, "{\"type\": \"int\", \"logicalType\": \"unknown\"}")] + [TestCase(Single.MinValue, "{\"type\": \"long\", \"logicalType\": \"unknown\"}")] + [TestCase(Double.MinValue, "{\"type\": \"float\", \"logicalType\": \"unknown\"}")] + [TestCase(new byte[] { }, "{\"type\": \"double\", \"logicalType\": \"unknown\"}")] + [TestCase("", "{\"type\": \"bytes\", \"logicalType\": \"unknown\"}")] + public void TestIsInstanceOfLogicalType_IsFalse(object logicalValue, string schemaText) + { + var schema = (LogicalSchema)Schema.Parse(schemaText); + + var logicalType = new UnknownLogicalType(schema); + + Assert.IsFalse(logicalType.IsInstanceOfLogicalType(logicalValue)); + } + + // See also a new test in Avro.Tests.File in TestSpecificDataSource using unknowLogicalTypeSchema + } +} diff --git a/lang/csharp/src/apache/test/Utils/VersionTests.cs b/lang/csharp/src/apache/test/Utils/VersionTests.cs new file mode 100644 index 00000000000..20d7ed2f421 --- /dev/null +++ b/lang/csharp/src/apache/test/Utils/VersionTests.cs @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System.Reflection; +using NUnit.Framework; + +namespace Avro.Test.Utils +{ + public class VersionTests + { + // SemVer2.0 Regular Expression + public static string SemVerRegex = @"^((([0-9]+)\.([0-9]+)\.([0-9]+)(?:-([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?)(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?)$"; + + [Test] + public void VersionTest() + { + // Avro library's assembly + Assembly assembly = typeof(Schema).Assembly; + + // Note: InformationalVersion contains prerelease tag if available (e.g. 1.x.y-beta.z) + string libraryVersion = assembly.GetCustomAttribute().InformationalVersion; + + // Check version is SmeVer 2.0 compliant + Assert.That(libraryVersion, Does.Match(SemVerRegex)); + } + + [Test] + public void MandatoryAttributesTest() + { + // Avro library's assembly + Assembly assembly = typeof(Schema).Assembly; + + Assert.That(assembly.GetCustomAttribute(), Is.Not.Null); + Assert.That(assembly.GetCustomAttribute(), Is.Not.Null); + Assert.That(assembly.GetCustomAttribute(), Is.Not.Null); + Assert.That(assembly.GetCustomAttribute(), Is.Not.Null); + Assert.That(assembly.GetCustomAttribute(), Is.Not.Null); + } + } +} diff --git a/lang/csharp/stylecop.json b/lang/csharp/stylecop.json deleted file mode 100644 index 892559168ac..00000000000 --- a/lang/csharp/stylecop.json +++ /dev/null @@ -1,31 +0,0 @@ -{ - "$schema": "https://raw.githubusercontent.com/DotNetAnalyzers/StyleCopAnalyzers/master/StyleCop.Analyzers/StyleCop.Analyzers/Settings/stylecop.schema.json", - "settings": { - "documentationRules": { - "companyName": "Apache Software Foundation (ASF)", - "fileNamingConvention": "stylecop", - "documentInterfaces": false, - "documentExposedElements": true, - "documentInternalElements": false, - "documentPrivateElements": false, - "documentPrivateFields": false - }, - "indentation": { - "indentationSize": 4, - "tabSize": 4, - "useTabs": false - }, - "layoutRules": { - "newlineAtEndOfFile": "require", - "allowConsecutiveUsings": true - }, - "orderingRules": { - "blankLinesBetweenUsingGroups": "allow", - "systemUsingDirectivesFirst": true, - "usingDirectivesPlacement": "outsideNamespace" - }, - "readabilityRules": { - "allowBuiltInTypeAliases": false - } - } -} diff --git a/lang/csharp/versions.props b/lang/csharp/versions.props index b6b9747fe2a..7b6c257e54d 100644 --- a/lang/csharp/versions.props +++ b/lang/csharp/versions.props @@ -23,42 +23,54 @@ - 12.0.3 - 5.0.0 + 13.0.1 + 10.0.7 4.3.0 4.7.0 4.7.0 - 4.5.1 + + + 1.4.2 + 1.3.1 + + 4.1.0 + 1.1.7 - 10.0.3 - 4.4.0 - 4.3.0 - 4.3.0 - 4.3.0 + + 13.0.4 - - 16.8.0 - 16.8.0 - 5.0.3 - 16.11.0 - 3.13.2 - 3.12.0 - 3.17.0 + + 0.15.8 + 10.0.0 + 10.0.0 + 18.4.0 + 18.4.0 + 4.7.0 + 4.7.0 + 4.7.0 + 10.0.101 + 17.13.0 + 3.14.0 + 3.22.0 + 5.2.0 1.1.118 diff --git a/lang/java/android/pom.xml b/lang/java/android/pom.xml new file mode 100644 index 00000000000..5a054dd536d --- /dev/null +++ b/lang/java/android/pom.xml @@ -0,0 +1,114 @@ + + + 4.0.0 + + + avro-parent + org.apache.avro + 1.13.0-SNAPSHOT + ../pom.xml + + + avro-android + + Apache Avro Android Compatibility + https://avro.apache.org + A subset of the Avro core utilities for the Android platform + jar + + + ${project.parent.parent.basedir} + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + + + shade + + + true + + + org.apache.avro:* + + + + + org.apache.avro:avro + + org/apache/avro/** + + + org/apache/avro/avro/util/internal/ClassValueUtil** + org/apache/avro/avro/util/internal/ThreadLocalWithInitial** + org/apache/avro/reflect/** + + + + + + + + + + + + + org.apache.avro + avro + ${project.version} + + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + + + org.apache.commons + commons-compress + + + org.xerial.snappy + snappy-java + true + + + org.tukaani + xz + true + + + com.github.luben + zstd-jni + true + + + + diff --git a/lang/java/android/src/main/java/org/apache/avro/util/internal/ClassValueCache.java b/lang/java/android/src/main/java/org/apache/avro/util/internal/ClassValueCache.java new file mode 100644 index 00000000000..7728455268b --- /dev/null +++ b/lang/java/android/src/main/java/org/apache/avro/util/internal/ClassValueCache.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.util.internal; + +import java.util.function.Function; + +/** + * The Android environment doesn't support {@link ClassValue}. This utility + * bypasses its use in Avro to always recalculate the value without caching. + *

+ * This may have a performance impact in Android. + * + * @param Return type of the ClassValue + */ +public class ClassValueCache implements Function, R> { + + private final Function, R> ifAbsent; + + /** + * @param ifAbsent The function that calculates the value to be used from the + * class instance. + */ + public ClassValueCache(Function, R> ifAbsent) { + this.ifAbsent = ifAbsent; + } + + @Override + public R apply(Class c) { + return ifAbsent.apply(c); + } +} diff --git a/lang/java/android/src/main/java/org/apache/avro/util/internal/ThreadLocalWithInitial.java b/lang/java/android/src/main/java/org/apache/avro/util/internal/ThreadLocalWithInitial.java new file mode 100644 index 00000000000..cdba5ec7bd8 --- /dev/null +++ b/lang/java/android/src/main/java/org/apache/avro/util/internal/ThreadLocalWithInitial.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.util.internal; + +import java.util.function.Supplier; + +/** + * Wraps a {@link ThreadLocal#withInitial(Supplier)} so it can be overridden in + * an android environment, where this method is not available until API 26. + */ +public class ThreadLocalWithInitial { + + /** Delegate a ThreadLocal instance with the supplier. */ + @SuppressWarnings("AnonymousHasLambdaAlternative") + public static ThreadLocal of(Supplier supplier) { + return new ThreadLocal() { + @Override + protected T initialValue() { + return supplier.get(); + } + }; + } +} diff --git a/lang/java/android/src/test/java/org/apache/avro/util/internal/TestClassValueCache.java b/lang/java/android/src/test/java/org/apache/avro/util/internal/TestClassValueCache.java new file mode 100644 index 00000000000..5c889b70b53 --- /dev/null +++ b/lang/java/android/src/test/java/org/apache/avro/util/internal/TestClassValueCache.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.util.internal; + +import org.junit.jupiter.api.Test; + +import static org.hamcrest.CoreMatchers.*; +import static org.hamcrest.MatcherAssert.assertThat; + +public class TestClassValueCache { + @Test + void basic() { + ClassValueCache cache = new ClassValueCache<>(Class::toString); + + String fromCache = cache.apply(String.class); + assertThat(fromCache, is("class java.lang.String")); + + // Unlike the core ClassValueUtil, this always creates a new instance + assertThat(cache.apply(String.class), not(sameInstance(fromCache))); + } + +} diff --git a/lang/java/archetypes/avro-service-archetype/pom.xml b/lang/java/archetypes/avro-service-archetype/pom.xml index 1b50363554b..83d59dd4470 100644 --- a/lang/java/archetypes/avro-service-archetype/pom.xml +++ b/lang/java/archetypes/avro-service-archetype/pom.xml @@ -23,7 +23,7 @@ avro-archetypes-parent org.apache.avro - 1.11.0-SNAPSHOT + 1.13.0-SNAPSHOT ../pom.xml diff --git a/lang/java/archetypes/avro-service-archetype/src/main/pom/pom.xml b/lang/java/archetypes/avro-service-archetype/src/main/pom/pom.xml index 362f5b69ce9..be2717556a5 100644 --- a/lang/java/archetypes/avro-service-archetype/src/main/pom/pom.xml +++ b/lang/java/archetypes/avro-service-archetype/src/main/pom/pom.xml @@ -33,13 +33,28 @@ Simple Avro Ordering Service + ${maven.compiler.source} + ${maven.compiler.release} + ${project.build.sourceEncoding} ${project.version} - ${jackson.version} - ${junit.version} + ${jackson-bom.version} + ${junit5.version} 1.2.3 ${slf4j.version} + + + + com.fasterxml.jackson + jackson-bom + \${jackson-bom.version} + pom + import + + + + org.apache.avro @@ -56,16 +71,6 @@ avro-ipc-netty \${avro.version} - - com.fasterxml.jackson.core - jackson-core - \${jackson.version} - - - com.fasterxml.jackson.core - jackson-databind - \${jackson.version} - org.slf4j slf4j-api @@ -77,9 +82,9 @@ \${logback.version} - junit - junit - \${junit.version} + org.junit.jupiter + junit-jupiter + \${junit5.version} test @@ -135,6 +140,10 @@ org.apache.maven.plugins maven-compiler-plugin + + \${maven.compiler.source} + \${maven.compiler.release} + diff --git a/lang/java/archetypes/avro-service-archetype/src/main/resources/archetype-resources/src/test/java/integration/SimpleOrderServiceIntegrationTest.java b/lang/java/archetypes/avro-service-archetype/src/main/resources/archetype-resources/src/test/java/integration/SimpleOrderServiceIntegrationTest.java index e5d6ad7e509..0796826e0a9 100644 --- a/lang/java/archetypes/avro-service-archetype/src/main/resources/archetype-resources/src/test/java/integration/SimpleOrderServiceIntegrationTest.java +++ b/lang/java/archetypes/avro-service-archetype/src/main/resources/archetype-resources/src/test/java/integration/SimpleOrderServiceIntegrationTest.java @@ -21,8 +21,8 @@ package ${package}.integration; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.net.InetSocketAddress; import java.util.ArrayList; @@ -30,9 +30,9 @@ import ${package}.transport.SimpleOrderServiceEndpoint; import ${package}.transport.SimpleOrderServiceClient; -import org.junit.AfterClass; -import org.junit.BeforeClass; -import org.junit.Test; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; import ${package}.service.Confirmation; import ${package}.service.Item; @@ -42,13 +42,13 @@ * SimpleOrderServiceIntegrationTest runs as part of the Integration phase of the build and is * meant for end to end service testing. */ -public class SimpleOrderServiceIntegrationTest { +class SimpleOrderServiceIntegrationTest { private static SimpleOrderServiceEndpoint service; private static SimpleOrderServiceClient client; @Test - public void simpleRoundTripTest() throws Exception { + void simpleRoundTripTest() throws Exception { Order simpleOrder = createOrder(); Confirmation c = client.submitOrder(simpleOrder); @@ -57,7 +57,7 @@ public void simpleRoundTripTest() throws Exception { assertTrue(c.getEstimatedCompletion() > 0); } - @BeforeClass + @BeforeAll public static void setupTransport() throws Exception { InetSocketAddress endpointAddress = new InetSocketAddress("0.0.0.0", 12345); service = new SimpleOrderServiceEndpoint(endpointAddress); @@ -67,7 +67,7 @@ public static void setupTransport() throws Exception { client.start(); } - @AfterClass + @AfterAll public static void shutdownTransport() throws Exception { client.stop(); service.stop(); diff --git a/lang/java/archetypes/avro-service-archetype/src/test/integration/projects/basic/archetype.properties b/lang/java/archetypes/avro-service-archetype/src/test/resources/projects/basic/archetype.properties similarity index 100% rename from lang/java/archetypes/avro-service-archetype/src/test/integration/projects/basic/archetype.properties rename to lang/java/archetypes/avro-service-archetype/src/test/resources/projects/basic/archetype.properties diff --git a/lang/java/archetypes/avro-service-archetype/src/test/integration/projects/basic/goal.txt b/lang/java/archetypes/avro-service-archetype/src/test/resources/projects/basic/goal.txt similarity index 100% rename from lang/java/archetypes/avro-service-archetype/src/test/integration/projects/basic/goal.txt rename to lang/java/archetypes/avro-service-archetype/src/test/resources/projects/basic/goal.txt diff --git a/lang/java/archetypes/pom.xml b/lang/java/archetypes/pom.xml index 27837400503..6d2d6641b72 100644 --- a/lang/java/archetypes/pom.xml +++ b/lang/java/archetypes/pom.xml @@ -22,7 +22,7 @@ org.apache.avro avro-parent - 1.11.0-SNAPSHOT + 1.13.0-SNAPSHOT ../pom.xml diff --git a/lang/java/avro/pom.xml b/lang/java/avro/pom.xml index bfa8154e11f..8cab8b75f5c 100644 --- a/lang/java/avro/pom.xml +++ b/lang/java/avro/pom.xml @@ -23,8 +23,8 @@ avro-parent org.apache.avro - 1.11.0-SNAPSHOT - ../ + 1.13.0-SNAPSHOT + ../pom.xml avro @@ -54,7 +54,24 @@ org/apache/avro/data/Json.avsc + + src/main/resources + + + + src/test/resources + + + ../../../share/ + + schemas/** + test/** + + share/ + + + org.apache.maven.plugins @@ -67,6 +84,7 @@ + org.apache.maven.plugins maven-surefire-plugin @@ -87,135 +105,107 @@ - test-with-fast-reader + test-without-fast-reader test test - true + false + + + + org.apache.maven.plugins + maven-invoker-plugin + + true + + true + ./src/it + ${project.build.directory}/it + + pom.xml + + verify + ${project.build.directory}/local-repo + ./src/it/settings.xml + + + true + + + clean + test + + + + + + Populate the local repo for integration tests + + install + + + + + Run all tests under Java 11 + + run + + + + [11,12) + + + 11 + + ${project.build.directory}/it-jdk-11 + + + + + Run all tests under Java 17 + + run + + + + [17,18) + + + 17 + + ${project.build.directory}/it-jdk-17 + + + + + Run all tests under Java 21 + + run + + + + [21,22) + + + 21 + + ${project.build.directory}/it-jdk-21 + + + + + - - - interop-data-generate - - false - - - - - org.codehaus.mojo - exec-maven-plugin - ${exec-plugin.version} - - - - interop-generate-null-codec - generate-resources - - org.apache.avro.util.RandomData - - ../../../share/test/schemas/interop.avsc - ../../../build/interop/data/java.avro - 100 - - - java - - - - - interop-generate-deflate-codec - generate-resources - - org.apache.avro.util.RandomData - - ../../../share/test/schemas/interop.avsc - ../../../build/interop/data/java_deflate.avro - 100 - deflate - - - java - - - - - interop-generate-snappy-codec - generate-resources - - org.apache.avro.util.RandomData - - ../../../share/test/schemas/interop.avsc - ../../../build/interop/data/java_snappy.avro - 100 - snappy - - - java - - - - - interop-generate-bzip2-codec - generate-resources - - org.apache.avro.util.RandomData - - ../../../share/test/schemas/interop.avsc - ../../../build/interop/data/java_bzip2.avro - 100 - bzip2 - - - java - - - - - interop-generate-xz-codec - generate-resources - - org.apache.avro.util.RandomData - - ../../../share/test/schemas/interop.avsc - ../../../build/interop/data/java_xz.avro - 100 - xz - - - java - - - - - interop-generate-zstandard-codec - generate-resources - - org.apache.avro.util.RandomData - - ../../../share/test/schemas/interop.avsc - ../../../build/interop/data/java_zstandard.avro - 100 - zstandard - - - java - - - - - - - @@ -250,5 +240,10 @@ hamcrest-library test + + org.mockito + mockito-core + test + diff --git a/lang/java/avro/src/it/pom.xml b/lang/java/avro/src/it/pom.xml new file mode 100644 index 00000000000..bd9bc523d7a --- /dev/null +++ b/lang/java/avro/src/it/pom.xml @@ -0,0 +1,179 @@ + + + + 4.0.0 + + nl.example.avro + integration-test + 1.0.0 + + jar + IT : Java @integrationTestingJDK@ + + + UTF-8 + UTF-8 + 8 + + + + + + ../../src/test/resources + + + ../../../../../share/ + + schemas/** + test/** + + share/ + + + + + + org.apache.maven.plugins + maven-toolchains-plugin + @maven-toolchains-plugin.version@ + + + + select-jdk-toolchain + + + + + @integrationTestingJDK@ + + + + + org.apache.maven.plugins + maven-compiler-plugin + @maven-compiler-plugin.version@ + + + default-compile + + true + + + + default-testCompile + + ../../src/test/java + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + @maven-surefire-plugin.version@ + + false + true + + + + + java.net.URI,java.net.URL, + java.io.File, + java.util.HashMap, + java.util.List, + java.util.Collection, + java.util.Map, + java.util.Set, + java.util.concurrent.ConcurrentHashMap, + java.util.LinkedHashMap, + java.util.TreeMap + + org.apache.avro + + + + + + + + + + + @project.groupId@ + @project.artifactId@ + @project.version@ + + + + + org.xerial.snappy + snappy-java + @snappy.version@ + true + + + + org.tukaani + xz + @tukaani.version@ + true + + + + com.github.luben + zstd-jni + @zstd-jni.version@ + true + + + + + org.junit.vintage + junit-vintage-engine + @junit5.version@ + test + + + + org.junit.jupiter + junit-jupiter + @junit5.version@ + test + + + + org.hamcrest + hamcrest-library + @hamcrest.version@ + test + + + + org.mockito + mockito-core + @mockito.version@ + test + + + + + diff --git a/lang/java/avro/src/it/settings.xml b/lang/java/avro/src/it/settings.xml new file mode 100644 index 00000000000..2e3d2e38201 --- /dev/null +++ b/lang/java/avro/src/it/settings.xml @@ -0,0 +1,51 @@ + + + + + + it-repo + + true + + + + local.central + @localRepositoryUrl@ + + true + + + true + + + + + + local.central + @localRepositoryUrl@ + + true + + + true + + + + + + diff --git a/lang/java/avro/src/main/java/org/apache/avro/CanonicalSchemaFormatterFactory.java b/lang/java/avro/src/main/java/org/apache/avro/CanonicalSchemaFormatterFactory.java new file mode 100644 index 00000000000..8ddec8155a4 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/CanonicalSchemaFormatterFactory.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +/** + * Schema formatter factory that supports the "Parsing Canonical Form". + * + * @see Specification: + * Parsing Canonical Form for Schemas + */ +public class CanonicalSchemaFormatterFactory implements SchemaFormatterFactory, SchemaFormatter { + @Override + public SchemaFormatter getDefaultFormatter() { + return this; + } + + @Override + public String format(Schema schema) { + return SchemaNormalization.toParsingForm(schema); + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/Conversion.java b/lang/java/avro/src/main/java/org/apache/avro/Conversion.java index 4ae75f4a5cb..934672e7d30 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/Conversion.java +++ b/lang/java/avro/src/main/java/org/apache/avro/Conversion.java @@ -21,6 +21,9 @@ import java.nio.ByteBuffer; import java.util.Collection; import java.util.Map; +import java.util.ServiceLoader; + +import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericEnumSymbol; import org.apache.avro.generic.GenericFixed; import org.apache.avro.generic.IndexedRecord; @@ -28,23 +31,33 @@ /** * Conversion between generic and logical type instances. *

- * Instances of this class are added to GenericData to convert a logical type to - * a particular representation. + * Instances of this class can be added to GenericData to convert a logical type + * to a particular representation. This can be done manually, using + * {@link GenericData#addLogicalTypeConversion(Conversion)}, or automatically. + * This last option uses the Java {@link ServiceLoader}, and requires the + * implementation to be a public class with a public no-arg constructor, be + * named in a file called {@code /META-INF/services/org.apache.avro.Conversion}, + * and both must available in the classpath. *

- * Implementations must provide: * {@link #getConvertedType()}: get the Java - * class used for the logical type * {@link #getLogicalTypeName()}: get the - * logical type this implements + * Implementations must provide: + *

    + *
  • {@link #getConvertedType()}: get the Java class used for the logical + * type
  • + *
  • {@link #getLogicalTypeName()}: get the logical type this implements
  • + *
*

- * Subclasses must also override all of the conversion methods for Avro's base - * types that are valid for the logical type, or else risk causing + * Subclasses must also override the conversion methods for Avro's base types + * that are valid for the logical type, or else risk causing * {@code UnsupportedOperationException} at runtime. *

* Optionally, use {@link #getRecommendedSchema()} to provide a Schema that will - * be used when a Schema is generated for the class returned by - * {@code getConvertedType}. + * be used when generating a Schema for the class. This is useful when using + * {@code ReflectData} or {@code ProtobufData}, for example. * - * @param a Java type that generic data is converted to + * @param a Java type that can represent the named logical type + * @see ServiceLoader */ +@SuppressWarnings("unused") public abstract class Conversion { /** @@ -65,9 +78,9 @@ public abstract class Conversion { * Certain logical types may require adjusting the code within the "setter" * methods to make sure the data that is set is properly formatted. This method * allows the Conversion to generate custom setter code if required. - * - * @param varName - * @param valParamName + * + * @param varName the name of the variable holding the converted value + * @param valParamName the name of the parameter with the new converted value * @return a String for the body of the setter method */ public String adjustAndSetValue(String varName, String valParamName) { @@ -102,7 +115,7 @@ public T fromCharSequence(CharSequence value, Schema schema, LogicalType type) { throw new UnsupportedOperationException("fromCharSequence is not supported for " + type.getName()); } - public T fromEnumSymbol(GenericEnumSymbol value, Schema schema, LogicalType type) { + public T fromEnumSymbol(GenericEnumSymbol value, Schema schema, LogicalType type) { throw new UnsupportedOperationException("fromEnumSymbol is not supported for " + type.getName()); } @@ -150,7 +163,7 @@ public CharSequence toCharSequence(T value, Schema schema, LogicalType type) { throw new UnsupportedOperationException("toCharSequence is not supported for " + type.getName()); } - public GenericEnumSymbol toEnumSymbol(T value, Schema schema, LogicalType type) { + public GenericEnumSymbol toEnumSymbol(T value, Schema schema, LogicalType type) { throw new UnsupportedOperationException("toEnumSymbol is not supported for " + type.getName()); } diff --git a/lang/java/avro/src/main/java/org/apache/avro/Conversions.java b/lang/java/avro/src/main/java/org/apache/avro/Conversions.java index 1c28c9adb81..2fa15eb959c 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/Conversions.java +++ b/lang/java/avro/src/main/java/org/apache/avro/Conversions.java @@ -18,15 +18,24 @@ package org.apache.avro; -import java.math.RoundingMode; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericEnumSymbol; import org.apache.avro.generic.GenericFixed; import org.apache.avro.generic.IndexedRecord; +import org.apache.avro.io.BinaryDecoder; +import org.apache.avro.io.BinaryEncoder; +import org.apache.avro.io.DecoderFactory; +import org.apache.avro.io.EncoderFactory; +import org.apache.avro.util.TimePeriod; +import java.io.ByteArrayOutputStream; +import java.io.IOException; import java.math.BigDecimal; import java.math.BigInteger; +import java.math.RoundingMode; import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.IntBuffer; import java.util.Arrays; import java.util.Collection; import java.util.Map; @@ -59,6 +68,22 @@ public UUID fromCharSequence(CharSequence value, Schema schema, LogicalType type public CharSequence toCharSequence(UUID value, Schema schema, LogicalType type) { return value.toString(); } + + @Override + public UUID fromFixed(final GenericFixed value, final Schema schema, final LogicalType type) { + ByteBuffer buffer = ByteBuffer.wrap(value.bytes()); + long mostSigBits = buffer.getLong(); + long leastSigBits = buffer.getLong(); + return new UUID(mostSigBits, leastSigBits); + } + + @Override + public GenericFixed toFixed(final UUID value, final Schema schema, final LogicalType type) { + ByteBuffer buffer = ByteBuffer.allocate(2 * Long.BYTES); + buffer.putLong(value.getMostSignificantBits()); + buffer.putLong(value.getLeastSignificantBits()); + return new GenericData.Fixed(schema, buffer.array()); + } } public static class DecimalConversion extends Conversion { @@ -106,11 +131,12 @@ public GenericFixed toFixed(BigDecimal value, Schema schema, LogicalType type) { byte fillByte = (byte) (value.signum() < 0 ? 0xFF : 0x00); byte[] unscaled = value.unscaledValue().toByteArray(); byte[] bytes = new byte[schema.getFixedSize()]; - int offset = bytes.length - unscaled.length; + int unscaledLength = unscaled.length; + int offset = bytes.length - unscaledLength; - // Fill the front of the array and copy remaining with unscaled values + // Fill the front with the filler and copy the unscaled value into the remainder Arrays.fill(bytes, 0, offset, fillByte); - System.arraycopy(unscaled, 0, bytes, offset, bytes.length - offset); + System.arraycopy(unscaled, 0, bytes, offset, unscaledLength); return new GenericData.Fixed(schema, bytes); } @@ -146,8 +172,104 @@ private static BigDecimal validate(final LogicalTypes.Decimal decimal, BigDecima } } + public static class BigDecimalConversion extends Conversion { + + @Override + public Class getConvertedType() { + return BigDecimal.class; + } + + @Override + public String getLogicalTypeName() { + return "big-decimal"; + } + + @Override + public BigDecimal fromBytes(final ByteBuffer value, final Schema schema, final LogicalType type) { + BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(value.array(), null); + + try { + BigInteger bg = null; + ByteBuffer buffer = decoder.readBytes(null); + byte[] array = buffer.array(); + if (array.length > 0) { + bg = new BigInteger(array); + } + + int scale = decoder.readInt(); + return new BigDecimal(bg, scale); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public ByteBuffer toBytes(final BigDecimal value, final Schema schema, final LogicalType type) { + try { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null); + + BigInteger unscaledValue = value.unscaledValue(); + if (unscaledValue != null) { + encoder.writeBytes(unscaledValue.toByteArray()); + } else { + encoder.writeBytes(new byte[] {}); + } + encoder.writeInt(value.scale()); + encoder.flush(); + return ByteBuffer.wrap(out.toByteArray()); + + } catch (IOException e) { + throw new RuntimeException(e); + } + + } + + @Override + public Schema getRecommendedSchema() { + return LogicalTypes.bigDecimal().addToSchema(Schema.create(Schema.Type.BYTES)); + } + } + + public static class DurationConversion extends Conversion { + @Override + public Class getConvertedType() { + return TimePeriod.class; + } + + @Override + public String getLogicalTypeName() { + return "duration"; + } + + @Override + public Schema getRecommendedSchema() { + return LogicalTypes.duration().addToSchema(Schema.createFixed("time.Duration", + "A 12-byte byte array encoding a duration in months, days and milliseconds.", null, 12)); + } + + @Override + public TimePeriod fromFixed(GenericFixed value, Schema schema, LogicalType type) { + IntBuffer buffer = ByteBuffer.wrap(value.bytes()).order(ByteOrder.LITTLE_ENDIAN).asIntBuffer(); + long months = Integer.toUnsignedLong(buffer.get()); + long days = Integer.toUnsignedLong(buffer.get()); + long millis = Integer.toUnsignedLong(buffer.get()); + return TimePeriod.of(months, days, millis); + } + + @Override + public GenericFixed toFixed(TimePeriod value, Schema schema, LogicalType type) { + ByteBuffer buffer = ByteBuffer.allocate(12).order(ByteOrder.LITTLE_ENDIAN); + IntBuffer intBuffer = buffer.asIntBuffer(); + intBuffer.put((int) value.getMonths()); + intBuffer.put((int) value.getDays()); + intBuffer.put((int) value.getMillis()); + return new GenericData.Fixed(schema, buffer.array()); + } + } + /** - * Convert a underlying representation of a logical type (such as a ByteBuffer) + * Convert an underlying representation of a logical type (such as a ByteBuffer) * to a higher level object (such as a BigDecimal). * * @param datum The object to be converted. @@ -157,9 +279,9 @@ private static BigDecimal validate(final LogicalTypes.Decimal decimal, BigDecima * @param conversion The tool used to finish the conversion. Cannot be null if * datum is not null. * @return The result object, which is a high level object of the logical type. - * If a null datum is passed in, a null value will be returned. - * @throws IllegalArgumentException if a null schema, type or conversion is - * passed in while datum is not null. + * The null datum always converts to a null value. + * @throws IllegalArgumentException if datum is not null, but schema, type or + * conversion is. */ public static Object convertToLogicalType(Object datum, Schema schema, LogicalType type, Conversion conversion) { if (datum == null) { @@ -176,9 +298,9 @@ public static Object convertToLogicalType(Object datum, Schema schema, LogicalTy case RECORD: return conversion.fromRecord((IndexedRecord) datum, schema, type); case ENUM: - return conversion.fromEnumSymbol((GenericEnumSymbol) datum, schema, type); + return conversion.fromEnumSymbol((GenericEnumSymbol) datum, schema, type); case ARRAY: - return conversion.fromArray((Collection) datum, schema, type); + return conversion.fromArray((Collection) datum, schema, type); case MAP: return conversion.fromMap((Map) datum, schema, type); case FIXED: @@ -201,13 +323,13 @@ public static Object convertToLogicalType(Object datum, Schema schema, LogicalTy return datum; } catch (ClassCastException e) { throw new AvroRuntimeException( - "Cannot convert " + datum + ":" + datum.getClass().getSimpleName() + ": expected generic type", e); + "Cannot convert " + datum + ':' + datum.getClass().getSimpleName() + ": expected generic type", e); } } /** * Convert a high level representation of a logical type (such as a BigDecimal) - * to the its underlying representation object (such as a ByteBuffer) + * to its underlying representation object (such as a ByteBuffer) * * @param datum The object to be converted. * @param schema The schema of datum. Cannot be null if datum is not null. @@ -218,8 +340,8 @@ public static Object convertToLogicalType(Object datum, Schema schema, LogicalTy * @return The result object, which is an underlying representation object of * the logical type. If the input param datum is null, a null value will * be returned. - * @throws IllegalArgumentException if a null schema, type or conversion is - * passed in while datum is not null. + * @throws IllegalArgumentException if datum is not null, but schema, type or + * conversion is. */ public static Object convertToRawType(Object datum, Schema schema, LogicalType type, Conversion conversion) { if (datum == null) { @@ -262,7 +384,7 @@ public static Object convertToRawType(Object datum, Schema schema, LogicalTy return datum; } catch (ClassCastException e) { throw new AvroRuntimeException( - "Cannot convert " + datum + ":" + datum.getClass().getSimpleName() + ": expected logical type", e); + "Cannot convert " + datum + ':' + datum.getClass().getSimpleName() + ": expected logical type", e); } } diff --git a/lang/java/avro/src/main/java/org/apache/avro/FormattedSchemaParser.java b/lang/java/avro/src/main/java/org/apache/avro/FormattedSchemaParser.java new file mode 100644 index 00000000000..c37eca15dc6 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/FormattedSchemaParser.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import java.io.IOException; +import java.net.URI; + +/** + * Schema parser for a specific schema format. + * + *

+ * The {@link SchemaParser} class uses this interface, supporting text based + * schema sources. + *

+ * + *

+ * Implementations are located using a {@link java.util.ServiceLoader} and must + * therefore be threadsafe. See the {@code ServiceLoader} class for details on + * loading your implementation. + *

+ * + * @see java.util.ServiceLoader + */ +public interface FormattedSchemaParser { + /** + *

+ * Parse schema definitions from a text based source. + *

+ * + *

Notes for implementers:

+ * + *
    + *
  • Schema definitions are expected not to be in the format the parser + * expects. So when the input clearly doesn't make sense (e.g., reading "/**" + * when expecting JSON), it is a good idea not to do anything (especially + * calling methods on the @code ParseContext}).
  • + *
  • The parameter {@code parseContext} is not thread-safe.
  • + *
  • All named schema definitions that are parsed should be added to the + * provided {@link ParseContext}.
  • + *
  • Optionally, you may return a "main" schema. Some schema definitions have + * one, for example the schema defined by the root of the JSON document in a + * standard schema + * definition. If unsure, return {@code null}.
  • + *
  • If parsing fails, throw a {@link SchemaParseException}. This will let the + * parsing process recover and continue.
  • + *
  • Throwing anything other than a {@code SchemaParseException} will abort + * the parsing process, so reserve that for rethrowing exceptions.
  • + *
+ * + * @param parseContext the current parse context: all named schemata that are + * parsed should be added here, otherwise resolving + * schemata can fail; contains all previously known types + * @param baseUri the base location of the schema, or {@code null} if + * not known + * @param formattedSchema the text of the schema definition(s) to parse + * @return the main schema, if any + * @throws IOException when the schema cannot be read + * @throws SchemaParseException when the schema cannot be parsed + */ + Schema parse(ParseContext parseContext, URI baseUri, CharSequence formattedSchema) + throws IOException, SchemaParseException; +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/JsonProperties.java b/lang/java/avro/src/main/java/org/apache/avro/JsonProperties.java index e8c33a42d31..5384b8595fc 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/JsonProperties.java +++ b/lang/java/avro/src/main/java/org/apache/avro/JsonProperties.java @@ -24,16 +24,18 @@ import java.util.Map; import java.util.Map.Entry; import java.util.Queue; +import java.util.Objects; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.ConcurrentMap; import java.io.IOException; +import java.util.function.BiConsumer; import org.apache.avro.util.internal.Accessor; import org.apache.avro.util.internal.Accessor.JsonPropertiesAccessor; -import org.apache.avro.reflect.MapEntry; +import org.apache.avro.util.MapEntry; import org.apache.avro.util.internal.JacksonUtils; import com.fasterxml.jackson.core.JsonGenerator; @@ -148,9 +150,9 @@ private Null() { // Also, we only ever ADD to the collection, never changing a value, so // putWithAbsent is the // only modifier - private ConcurrentMap props = new ConcurrentHashMap() { + private final ConcurrentMap props = new ConcurrentHashMap<>() { private static final long serialVersionUID = 1L; - private Queue> propOrder = new ConcurrentLinkedQueue<>(); + private final Queue> propOrder = new ConcurrentLinkedQueue<>(); @Override public JsonNode putIfAbsent(String key, JsonNode value) { @@ -168,10 +170,10 @@ public JsonNode put(String key, JsonNode value) { @Override public Set> entrySet() { - return new AbstractSet>() { + return new AbstractSet<>() { @Override public Iterator> iterator() { - return new Iterator>() { + return new Iterator<>() { Iterator> it = propOrder.iterator(); @Override @@ -194,7 +196,7 @@ public int size() { } }; - private Set reserved; + private final Set reserved; JsonProperties(Set reserved) { this.reserved = reserved; @@ -204,7 +206,7 @@ public int size() { this.reserved = reserved; for (Entry a : propMap.entrySet()) { Object v = a.getValue(); - JsonNode json = null; + JsonNode json; if (v instanceof String) { json = TextNode.valueOf((String) v); } else if (v instanceof JsonNode) { @@ -241,6 +243,11 @@ public Object getObjectProp(String name) { return JacksonUtils.toObject(props.get(name)); } + public Object getObjectProp(String name, Object defaultValue) { + final JsonNode json = props.get(name); + return json != null ? JacksonUtils.toObject(json) : defaultValue; + } + /** * Adds a property with the given name name and value value. * Neither name nor value can be null. It is illegal @@ -307,6 +314,17 @@ public Map getObjectProps() { return Collections.unmodifiableMap(result); } + public boolean propsContainsKey(String key) { + return this.props.containsKey(key); + } + + public void forEachProperty(BiConsumer consumer) { + for (Map.Entry entry : this.props.entrySet()) { + final Object value = JacksonUtils.toObject(entry.getValue()); + consumer.accept(entry.getKey(), value); + } + } + void writeProps(JsonGenerator gen) throws IOException { for (Map.Entry e : props.entrySet()) gen.writeObjectField(e.getKey(), e.getValue()); @@ -317,7 +335,7 @@ int propsHashCode() { } boolean propsEqual(JsonProperties np) { - return props.equals(np.props); + return Objects.equals(props, np.props); } public boolean hasProps() { diff --git a/lang/java/avro/src/main/java/org/apache/avro/JsonSchemaFormatter.java b/lang/java/avro/src/main/java/org/apache/avro/JsonSchemaFormatter.java new file mode 100644 index 00000000000..5d372658611 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/JsonSchemaFormatter.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +public class JsonSchemaFormatter implements SchemaFormatter { + private final boolean prettyPrinted; + + public JsonSchemaFormatter(boolean prettyPrinted) { + this.prettyPrinted = prettyPrinted; + } + + @Override + public String format(Schema schema) { + // TODO: Move the toString implementation here and have Schema#toString() + // use SchemaFormatter with the formats "json/pretty" and "json/inline" + return schema.toString(prettyPrinted); + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/JsonSchemaFormatterFactory.java b/lang/java/avro/src/main/java/org/apache/avro/JsonSchemaFormatterFactory.java new file mode 100644 index 00000000000..915a671ebd7 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/JsonSchemaFormatterFactory.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +public class JsonSchemaFormatterFactory implements SchemaFormatterFactory { + + public static final String VARIANT_NAME_PRETTY = "pretty"; + public static final String VARIANT_NAME_INLINE = "inline"; + + @Override + public SchemaFormatter getDefaultFormatter() { + return getFormatterForVariant(VARIANT_NAME_PRETTY); + } + + @Override + public SchemaFormatter getFormatterForVariant(String variantName) { + if (VARIANT_NAME_PRETTY.equals(variantName)) { + return new JsonSchemaFormatter(true); + } else if (VARIANT_NAME_INLINE.equals(variantName)) { + return new JsonSchemaFormatter(false); + } else { + throw new AvroRuntimeException("Unknown JSON variant: " + variantName); + } + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/JsonSchemaParser.java b/lang/java/avro/src/main/java/org/apache/avro/JsonSchemaParser.java new file mode 100644 index 00000000000..5dd532444a3 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/JsonSchemaParser.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import java.io.IOException; +import java.net.URI; + +/** + * Schema parser for JSON formatted schemata. This initial implementation simply + * delegates to the {@link Schema.Parser} class, though it should be refactored + * out of there. + * + *

+ * Note: this class is intentionally not available via the Java + * {@link java.util.ServiceLoader}, as its use is hardcoded as fallback when no + * service exists. This enables users to reliably override the standard JSON + * parser as well. + *

+ */ +public class JsonSchemaParser implements FormattedSchemaParser { + /** + *

+ * Parse a schema written in the internal (JSON) format without any validations. + *

+ * + *

+ * Using this method is only safe if used to parse a write schema (i.e., a + * schema used to read Avro data). Other usages, for example by generated Avro + * code, can cause interoperability problems. + *

+ * + *

+ * Use with care and sufficient testing! + *

+ * + * @param fragments one or more strings making up the schema (some schemata + * exceed the compiler limits) + * @return the parsed schema + */ + public static Schema parseInternal(String... fragments) { + StringBuilder buffer = new StringBuilder(); + for (String fragment : fragments) { + buffer.append(fragment); + } + + boolean saved = Schema.getValidateDefaults(); + try { + Schema.setValidateDefaults(false); + ParseContext context = new ParseContext(NameValidator.NO_VALIDATION); + Schema schema = new JsonSchemaParser().parse(context, buffer, true); + context.commit(); + context.resolveAllSchemas(); + return context.resolve(schema); + } finally { + Schema.setValidateDefaults(saved); + } + } + + @Override + public Schema parse(ParseContext parseContext, URI baseUri, CharSequence formattedSchema) + throws IOException, SchemaParseException { + return parse(parseContext, formattedSchema, false); + } + + private Schema parse(ParseContext parseContext, CharSequence formattedSchema, boolean allowInvalidDefaults) + throws SchemaParseException { + Schema.Parser parser = new Schema.Parser(parseContext); + if (allowInvalidDefaults) { + parser.setValidateDefaults(false); + } + return parser.parseInternal(formattedSchema.toString()); + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/LogicalTypes.java b/lang/java/avro/src/main/java/org/apache/avro/LogicalTypes.java index 5b03e1524fc..d5b246b4b7d 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/LogicalTypes.java +++ b/lang/java/avro/src/main/java/org/apache/avro/LogicalTypes.java @@ -18,18 +18,35 @@ package org.apache.avro; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import java.util.Collections; import java.util.Map; import java.util.Objects; +import java.util.ServiceLoader; import java.util.concurrent.ConcurrentHashMap; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - public class LogicalTypes { private static final Logger LOG = LoggerFactory.getLogger(LogicalTypes.class); + /** + * Factory interface and SPI for logical types. A {@code LogicalTypeFactory} can + * be registered in two ways: + * + *
    + *
  1. Manually, via {@link #register(LogicalTypeFactory)} or + * {@link #register(String, LogicalTypeFactory)}
  2. + * + *
  3. Automatically, when the {@code LogicalTypeFactory} implementation is a + * public class with a public no-arg constructor, is named in a file called + * {@code /META-INF/services/org.apache.avro.LogicalTypes$LogicalTypeFactory}, + * and both are available in the classpath
  4. + *
+ * + * @see ServiceLoader + */ public interface LogicalTypeFactory { LogicalType fromSchema(Schema schema); @@ -40,6 +57,12 @@ default String getTypeName() { private static final Map REGISTERED_TYPES = new ConcurrentHashMap<>(); + static { + for (LogicalTypeFactory logicalTypeFactory : ServiceLoader.load(LogicalTypeFactory.class)) { + register(logicalTypeFactory); + } + } + /** * Register a logical type. * @@ -114,6 +137,12 @@ private static LogicalType fromSchemaImpl(Schema schema, boolean throwErrors) { case DECIMAL: logicalType = new Decimal(schema); break; + case BIG_DECIMAL: + logicalType = BIG_DECIMAL_TYPE; + break; + case DURATION: + logicalType = DURATION_TYPE; + break; case UUID: logicalType = UUID_TYPE; break; @@ -123,6 +152,9 @@ private static LogicalType fromSchemaImpl(Schema schema, boolean throwErrors) { case TIMESTAMP_MICROS: logicalType = TIMESTAMP_MICROS_TYPE; break; + case TIMESTAMP_NANOS: + logicalType = TIMESTAMP_NANOS_TYPE; + break; case TIME_MILLIS: logicalType = TIME_MILLIS_TYPE; break; @@ -135,6 +167,9 @@ private static LogicalType fromSchemaImpl(Schema schema, boolean throwErrors) { case LOCAL_TIMESTAMP_MILLIS: logicalType = LOCAL_TIMESTAMP_MILLIS_TYPE; break; + case LOCAL_TIMESTAMP_NANOS: + logicalType = LOCAL_TIMESTAMP_NANOS_TYPE; + break; default: final LogicalTypeFactory typeFactory = REGISTERED_TYPES.get(typeName); logicalType = (typeFactory == null) ? null : typeFactory.fromSchema(schema); @@ -159,14 +194,18 @@ private static LogicalType fromSchemaImpl(Schema schema, boolean throwErrors) { } private static final String DECIMAL = "decimal"; + private static final String BIG_DECIMAL = "big-decimal"; + private static final String DURATION = "duration"; private static final String UUID = "uuid"; private static final String DATE = "date"; private static final String TIME_MILLIS = "time-millis"; private static final String TIME_MICROS = "time-micros"; private static final String TIMESTAMP_MILLIS = "timestamp-millis"; private static final String TIMESTAMP_MICROS = "timestamp-micros"; + private static final String TIMESTAMP_NANOS = "timestamp-nanos"; private static final String LOCAL_TIMESTAMP_MILLIS = "local-timestamp-millis"; private static final String LOCAL_TIMESTAMP_MICROS = "local-timestamp-micros"; + private static final String LOCAL_TIMESTAMP_NANOS = "local-timestamp-nanos"; /** Create a Decimal LogicalType with the given precision and scale 0 */ public static Decimal decimal(int precision) { @@ -178,12 +217,25 @@ public static Decimal decimal(int precision, int scale) { return new Decimal(precision, scale); } - private static final LogicalType UUID_TYPE = new LogicalType("uuid"); + private static final BigDecimal BIG_DECIMAL_TYPE = new BigDecimal(); + + /** Create a Big Decimal LogicalType that can accept any precision and scale */ + public static BigDecimal bigDecimal() { + return BIG_DECIMAL_TYPE; + } + + private static final LogicalType UUID_TYPE = new Uuid(); public static LogicalType uuid() { return UUID_TYPE; } + private static final LogicalType DURATION_TYPE = new Duration(); + + public static LogicalType duration() { + return DURATION_TYPE; + } + private static final Date DATE_TYPE = new Date(); public static Date date() { @@ -214,6 +266,12 @@ public static TimestampMicros timestampMicros() { return TIMESTAMP_MICROS_TYPE; } + private static final TimestampNanos TIMESTAMP_NANOS_TYPE = new TimestampNanos(); + + public static TimestampNanos timestampNanos() { + return TIMESTAMP_NANOS_TYPE; + } + private static final LocalTimestampMillis LOCAL_TIMESTAMP_MILLIS_TYPE = new LocalTimestampMillis(); public static LocalTimestampMillis localTimestampMillis() { @@ -226,6 +284,50 @@ public static LocalTimestampMicros localTimestampMicros() { return LOCAL_TIMESTAMP_MICROS_TYPE; } + private static final LocalTimestampNanos LOCAL_TIMESTAMP_NANOS_TYPE = new LocalTimestampNanos(); + + public static LocalTimestampNanos localTimestampNanos() { + return LOCAL_TIMESTAMP_NANOS_TYPE; + } + + /** Uuid represents a uuid without a time */ + public static class Uuid extends LogicalType { + + private static final int UUID_BYTES = 2 * Long.BYTES; + + private Uuid() { + super(UUID); + } + + @Override + public void validate(Schema schema) { + super.validate(schema); + if (schema.getType() != Schema.Type.STRING && schema.getType() != Schema.Type.FIXED) { + throw new IllegalArgumentException("Uuid can only be used with an underlying string or fixed type"); + } + if (schema.getType() == Schema.Type.FIXED && schema.getFixedSize() != UUID_BYTES) { + throw new IllegalArgumentException("Uuid with fixed type must have a size of " + UUID_BYTES + " bytes"); + } + } + } + + /** + * Duration represents a duration, consisting on months, days and milliseconds + */ + public static class Duration extends LogicalType { + private Duration() { + super(DURATION); + } + + @Override + public void validate(Schema schema) { + super.validate(schema); + if (schema.getType() != Schema.Type.FIXED || schema.getFixedSize() != 12) { + throw new IllegalArgumentException("Duration can only be used with an underlying fixed type of size 12."); + } + } + } + /** Decimal represents arbitrary-precision fixed-scale decimal numbers */ public static class Decimal extends LogicalType { private static final String PRECISION_PROP = "precision"; @@ -306,7 +408,7 @@ private long maxPrecision(Schema schema) { } private boolean hasProperty(Schema schema, String name) { - return (schema.getObjectProp(name) != null); + return schema.propsContainsKey(name); } private int getInt(Schema schema, String name) { @@ -340,6 +442,20 @@ public int hashCode() { } } + public static class BigDecimal extends LogicalType { + private BigDecimal() { + super(BIG_DECIMAL); + } + + @Override + public void validate(final Schema schema) { + super.validate(schema); + if (schema.getType() != Schema.Type.BYTES) { + throw new IllegalArgumentException("BigDecimal can only be used with an underlying bytes type"); + } + } + } + /** Date represents a date without a time */ public static class Date extends LogicalType { private Date() { @@ -415,6 +531,21 @@ public void validate(Schema schema) { } } + /** TimestampNanos represents a date and time in nanoseconds */ + public static class TimestampNanos extends LogicalType { + private TimestampNanos() { + super(TIMESTAMP_NANOS); + } + + @Override + public void validate(Schema schema) { + super.validate(schema); + if (schema.getType() != Schema.Type.LONG) { + throw new IllegalArgumentException("Timestamp (nanos) can only be used with an underlying long type"); + } + } + } + public static class LocalTimestampMillis extends LogicalType { private LocalTimestampMillis() { super(LOCAL_TIMESTAMP_MILLIS); @@ -443,4 +574,18 @@ public void validate(Schema schema) { } } + public static class LocalTimestampNanos extends LogicalType { + private LocalTimestampNanos() { + super(LOCAL_TIMESTAMP_NANOS); + } + + @Override + public void validate(Schema schema) { + super.validate(schema); + if (schema.getType() != Schema.Type.LONG) { + throw new IllegalArgumentException("Local timestamp (micros) can only be used with an underlying long type"); + } + } + } + } diff --git a/lang/java/avro/src/main/java/org/apache/avro/NameValidator.java b/lang/java/avro/src/main/java/org/apache/avro/NameValidator.java new file mode 100644 index 00000000000..f1262d922cf --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/NameValidator.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +public interface NameValidator { + + class Result { + private final String errors; + + public Result(final String errors) { + this.errors = errors; + } + + public boolean isOK() { + return this == NameValidator.OK; + } + + public String getErrors() { + return errors; + } + } + + Result OK = new Result(null); + + default Result validate(String name) { + return OK; + } + + NameValidator NO_VALIDATION = new NameValidator() { + }; + + NameValidator UTF_VALIDATOR = new NameValidator() { + @Override + public Result validate(final String name) { + if (name == null) { + return new Result("Null name"); + } + int length = name.length(); + if (length == 0) { + return new Result("Empty name"); + } + char first = name.charAt(0); + if (!(Character.isLetter(first) || first == '_')) { + return new Result("Illegal initial character: " + name); + } + for (int i = 1; i < length; i++) { + char c = name.charAt(i); + if (!(Character.isLetterOrDigit(c) || c == '_')) { + return new Result("Illegal character in: " + name); + } + } + return OK; + } + }; + + NameValidator STRICT_VALIDATOR = new NameValidator() { + @Override + public Result validate(final String name) { + if (name == null) { + return new Result("Null name"); + } + int length = name.length(); + if (length == 0) { + return new Result("Empty name"); + } + char first = name.charAt(0); + if (!(isLetter(first) || first == '_')) { + return new Result("Illegal initial character: " + name); + } + for (int i = 1; i < length; i++) { + char c = name.charAt(i); + if (!(isLetter(c) || isDigit(c) || c == '_')) { + return new Result("Illegal character in: " + name); + } + } + return OK; + } + + private boolean isLetter(char c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); + } + + private boolean isDigit(char c) { + return c >= '0' && c <= '9'; + } + + }; + +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/ParseContext.java b/lang/java/avro/src/main/java/org/apache/avro/ParseContext.java new file mode 100644 index 00000000000..1d0873eb8b1 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/ParseContext.java @@ -0,0 +1,351 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import org.apache.avro.util.SchemaResolver; +import org.apache.avro.util.Schemas; + +import java.util.ArrayList; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import static java.util.Objects.requireNonNull; + +/** + * Class to define a name context, useful to reference schemata with. This + * allows for the following: + * + *
    + *
  • Collect new named schemata.
  • + *
  • Find schemata by name, including primitives.
  • + *
  • Find schemas that do not exist yet.
  • + *
  • Resolve references to schemas that didn't exist yet when first used.
  • + *
+ * + *

+ * This class is NOT thread-safe. + *

+ * + *

+ * Note: this class has no use for most Avro users, but is a key component when + * implementing a schema parser. + *

+ * + * @see JSON based + * schema definition + **/ +public class ParseContext { + private static final Map PRIMITIVES = new HashMap<>(); + + static { + PRIMITIVES.put("string", Schema.Type.STRING); + PRIMITIVES.put("bytes", Schema.Type.BYTES); + PRIMITIVES.put("int", Schema.Type.INT); + PRIMITIVES.put("long", Schema.Type.LONG); + PRIMITIVES.put("float", Schema.Type.FLOAT); + PRIMITIVES.put("double", Schema.Type.DOUBLE); + PRIMITIVES.put("boolean", Schema.Type.BOOLEAN); + PRIMITIVES.put("null", Schema.Type.NULL); + } + + private static final Set NAMED_SCHEMA_TYPES = EnumSet.of(Schema.Type.RECORD, Schema.Type.ENUM, + Schema.Type.FIXED); + /** + * Collection of old schemata. Can contain unresolved references if !isResolved. + */ + private final Map oldSchemas; + /** + * Collection of new schemata. Can contain unresolved references. + */ + private final Map newSchemas; + /** + * The name validator to use. + */ + // Visible for use in JsonSchemaParser + final NameValidator nameValidator; + /** + * Visitor that was used to resolve schemata with. If not available, some + * schemata in {@code oldSchemas} may not be fully resolved. If available, all + * schemata in {@code oldSchemas} are resolved, and {@code newSchemas} is empty. + * After visiting a schema, it can return the corresponding resolved schema for + * a schema that possibly contains unresolved references. + */ + private SchemaResolver.ResolvingVisitor resolvingVisitor; + + /** + * Create a {@code ParseContext} for the default/{@code null} namespace, using + * default name validation for new schemata. + */ + public ParseContext() { + this(NameValidator.UTF_VALIDATOR); + } + + /** + * Create a {@code ParseContext} using the specified name validation for new + * schemata. + */ + public ParseContext(NameValidator nameValidator) { + this(requireNonNull(nameValidator), new LinkedHashMap<>(), new LinkedHashMap<>()); + } + + private ParseContext(NameValidator nameValidator, Map oldSchemas, Map newSchemas) { + this.nameValidator = nameValidator; + this.oldSchemas = oldSchemas; + this.newSchemas = newSchemas; + resolvingVisitor = null; + } + + /** + * Tell whether this context contains a schema with the given name. + * + * @param name a schema name + * @return {@code true} if the context contains a schema with this name, + * {@code false} otherwise + */ + public boolean contains(String name) { + return PRIMITIVES.containsKey(name) || oldSchemas.containsKey(name) || newSchemas.containsKey(name); + } + + /** + *

+ * Find a schema by name and namespace. + *

+ * + *

+ * That is: + *

+ * + *
    + *
  1. If {@code name} is a primitive name, return a (new) schema for it
  2. + *
  3. Otherwise, determine the full schema name (using the given + * {@code namespace} if necessary), and find it
  4. + *
  5. If no schema was found and {@code name} is a simple name, find the schema + * in the default (null) namespace
  6. + *
  7. If still no schema was found, return an unresolved reference for the full + * schema name (see step 2)
  8. + *
+ * + *

+ * Note: as an unresolved reference might be returned, the schema is not + * directly usable. Please {@link #put(Schema)} the schema using it in the + * context. The {@link SchemaParser} and protocol parsers will ensure you'll + * only get a resolved schema that is usable. + *

+ * + * @param name the schema name to find + * @param namespace the namespace to find the schema against + * @return the schema, or an unresolved reference + */ + public Schema find(String name, String namespace) { + Schema.Type type = PRIMITIVES.get(name); + if (type != null) { + return Schema.create(type); + } + + String fullName = fullName(name, namespace); + Schema schema = getNamedSchema(fullName); + if (schema == null) { + schema = getNamedSchema(name); + } + + return schema != null ? schema : SchemaResolver.unresolvedSchema(fullName); + } + + private String fullName(String name, String namespace) { + if (namespace != null && name.lastIndexOf('.') < 0) { + return namespace + "." + name; + } + return name; + } + + /** + * Get a schema by name. Note that the schema might not (yet) be resolved/usable + * until {@link #resolveAllSchemas()} has been called. + * + * @param fullName a full schema name + * @return the schema, if known + */ + public Schema getNamedSchema(String fullName) { + Schema schema = oldSchemas.get(fullName); + if (schema == null) { + schema = newSchemas.get(fullName); + } + return schema; + } + + /** + * Put the schema into this context. This is an idempotent operation: it only + * fails if this context already has a different schema with the same name. + * + *

+ * Note that although this method works for all types except for arrays, maps + * and unions, all primitive types have already been defined upon construction. + * This means you cannot redefine a 'long' with a logical timestamp type. + *

+ * + * @param schema the schema to put into the context + */ + public void put(Schema schema) { + if (!(NAMED_SCHEMA_TYPES.contains(schema.getType()))) { + throw new AvroTypeException("You can only put a named schema into the context"); + } + + String fullName = requireValidFullName(schema.getFullName()); + + Schema alreadyKnownSchema = oldSchemas.get(fullName); + if (alreadyKnownSchema != null) { + if (!schema.equals(alreadyKnownSchema)) { + throw new SchemaParseException("Can't redefine: " + fullName); + } + } else { + resolvingVisitor = null; + Schema previouslyAddedSchema = newSchemas.putIfAbsent(fullName, schema); + if (previouslyAddedSchema != null && !previouslyAddedSchema.equals(schema)) { + throw new SchemaParseException("Can't redefine: " + fullName); + } + } + } + + private String requireValidFullName(String fullName) { + String[] names = fullName.split("\\."); + for (int i = 0; i < names.length - 1; i++) { + validateName(names[i], "Namespace part"); + } + validateName(names[names.length - 1], "Name"); + return fullName; + } + + private void validateName(String name, String typeOfName) { + NameValidator.Result result = nameValidator.validate(name); + if (!result.isOK()) { + throw new SchemaParseException(typeOfName + " \"" + name + "\" is invalid: " + result.getErrors()); + } + } + + public boolean hasNewSchemas() { + return !newSchemas.isEmpty(); + } + + public void commit() { + oldSchemas.putAll(newSchemas); + newSchemas.clear(); + } + + public SchemaParser.ParseResult commit(Schema mainSchema) { + List parsedNamedSchemas = new ArrayList<>(newSchemas.values()); + SchemaParser.ParseResult parseResult = new SchemaParser.ParseResult() { + @Override + public Schema mainSchema() { + return mainSchema == null ? null : resolve(mainSchema); + } + + @Override + public List parsedNamedSchemas() { + return parsedNamedSchemas.stream().map(ParseContext.this::resolve).collect(Collectors.toList()); + } + }; + commit(); + return parseResult; + } + + public void rollback() { + newSchemas.clear(); + } + + /** + * Resolve all (named) schemas that were parsed. This resolves all forward + * references, even if parsed from different files. Note: the context must be + * committed for this method to work. + * + * @return all parsed schemas + * @throws AvroTypeException if a schema reference cannot be resolved + */ + public List resolveAllSchemas() { + ensureSchemasAreResolved(); + + return new ArrayList<>(oldSchemas.values()); + } + + private void ensureSchemasAreResolved() { + if (hasNewSchemas()) { + throw new IllegalStateException("Schemas cannot be resolved unless the ParseContext is committed."); + } + if (resolvingVisitor == null) { + NameValidator saved = Schema.getNameValidator(); + try { + // Ensure we use the same validation when copying schemas as when they were + // defined. + Schema.setNameValidator(nameValidator); + SchemaResolver.ResolvingVisitor visitor = new SchemaResolver.ResolvingVisitor(oldSchemas::get); + oldSchemas.values().forEach(schema -> Schemas.visit(schema, visitor)); + // Before this point is where we can get exceptions due to resolving failures. + for (Map.Entry entry : oldSchemas.entrySet()) { + entry.setValue(visitor.getResolved(entry.getValue())); + } + resolvingVisitor = visitor; + } finally { + Schema.setNameValidator(saved); + } + } + } + + /** + * Resolve unresolved references in a schema that was parsed for this + * context using the types known to this context. Note: this method will + * ensure all known schemas are resolved, or throw, and thus requires the + * context to be committed. + * + * @param schema the schema resolve + * @return the fully resolved schema + * @throws AvroTypeException if a schema reference cannot be resolved + */ + public Schema resolve(Schema schema) { + ensureSchemasAreResolved(); + + // As all (named) schemas are resolved now, we know: + // — All named types are either in oldSchemas or unknown. + // — All unnamed types can be visited&resolved without validation. + + if (NAMED_SCHEMA_TYPES.contains(schema.getType()) && schema.getFullName() != null) { + return requireNonNull(oldSchemas.get(schema.getFullName()), () -> "Unknown schema: " + schema.getFullName()); + } else { + // Unnamed or anonymous schema + // (protocol message request parameters are anonymous records) + Schemas.visit(schema, resolvingVisitor); // This field is set, as ensureSchemasAreResolved(); was called. + return resolvingVisitor.getResolved(schema); + } + } + + /** + * Return all known types by their fullname. Warning: this returns all types, + * even uncommitted ones, including unresolved references! + * + * @return a map of all types by their name + */ + public Map typesByName() { + LinkedHashMap result = new LinkedHashMap<>(); + result.putAll(oldSchemas); + result.putAll(newSchemas); + return result; + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/Protocol.java b/lang/java/avro/src/main/java/org/apache/avro/Protocol.java index 6987d4c8f54..3404b93d4a5 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/Protocol.java +++ b/lang/java/avro/src/main/java/org/apache/avro/Protocol.java @@ -19,28 +19,27 @@ import java.io.ByteArrayInputStream; import java.io.File; +import java.io.IOException; import java.io.InputStream; import java.io.StringWriter; import java.nio.charset.StandardCharsets; -import java.io.IOException; import java.security.MessageDigest; import java.util.ArrayList; -import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.Collection; -import java.util.Collections; +import java.util.Objects; import java.util.Set; -import java.util.HashSet; - -import org.apache.avro.Schema.Field; -import org.apache.avro.Schema.Field.Order; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.databind.JsonNode; +import org.apache.avro.Schema.Field; +import org.apache.avro.Schema.Field.Order; /** * A set of messages forming an application protocol. @@ -71,17 +70,15 @@ public class Protocol extends JsonProperties { public static final long VERSION = 1; // Support properties for both Protocol and Message objects - private static final Set MESSAGE_RESERVED = Collections - .unmodifiableSet(new HashSet<>(Arrays.asList("doc", "response", "request", "errors", "one-way"))); + private static final Set MESSAGE_RESERVED = Set.of("doc", "response", "request", "errors", "one-way"); - private static final Set FIELD_RESERVED = Collections - .unmodifiableSet(new HashSet<>(Arrays.asList("name", "type", "doc", "default", "aliases"))); + private static final Set FIELD_RESERVED = Set.of("name", "type", "doc", "default", "aliases"); /** A protocol message. */ public class Message extends JsonProperties { - private String name; - private String doc; - private Schema request; + private final String name; + private final String doc; + private final Schema request; /** Construct a message. */ private Message(String name, String doc, JsonProperties propMap, Schema request) { @@ -132,7 +129,7 @@ public String toString() { try { StringWriter writer = new StringWriter(); JsonGenerator gen = Schema.FACTORY.createGenerator(writer); - toJson(gen); + toJson(new HashSet<>(), gen); gen.flush(); return writer.toString(); } catch (IOException e) { @@ -140,19 +137,19 @@ public String toString() { } } - void toJson(JsonGenerator gen) throws IOException { + void toJson(Set knownNames, JsonGenerator gen) throws IOException { gen.writeStartObject(); if (doc != null) gen.writeStringField("doc", doc); writeProps(gen); // write out properties gen.writeFieldName("request"); - request.fieldsToJson(types, gen); + request.fieldsToJson(knownNames, namespace, gen); - toJson1(gen); + toJson1(knownNames, gen); gen.writeEndObject(); } - void toJson1(JsonGenerator gen) throws IOException { + void toJson1(Set knownNames, JsonGenerator gen) throws IOException { gen.writeStringField("response", "null"); gen.writeBooleanField("one-way", true); } @@ -175,12 +172,11 @@ public int hashCode() { public String getDoc() { return doc; } - } - private class TwoWayMessage extends Message { - private Schema response; - private Schema errors; + private final class TwoWayMessage extends Message { + private final Schema response; + private final Schema errors; /** Construct a message. */ private TwoWayMessage(String name, String doc, Map propMap, Schema request, Schema response, @@ -228,15 +224,15 @@ public int hashCode() { } @Override - void toJson1(JsonGenerator gen) throws IOException { + void toJson1(Set knownNames, JsonGenerator gen) throws IOException { gen.writeFieldName("response"); - response.toJson(types, gen); + response.toJson(knownNames, namespace, gen); List errs = errors.getTypes(); // elide system error if (errs.size() > 1) { Schema union = Schema.createUnion(errs.subList(1, errs.size())); gen.writeFieldName("errors"); - union.toJson(types, gen); + union.toJson(knownNames, namespace, gen); } } @@ -246,7 +242,7 @@ void toJson1(JsonGenerator gen) throws IOException { private String namespace; private String doc; - private Schema.Names types = new Schema.Names(); + private ParseContext context = new ParseContext(); private final Map messages = new LinkedHashMap<>(); private byte[] md5; @@ -256,8 +252,8 @@ void toJson1(JsonGenerator gen) throws IOException { /** Union type for generating system errors. */ public static final Schema SYSTEM_ERRORS = Schema.createUnion(Collections.singletonList(SYSTEM_ERROR)); - private static final Set PROTOCOL_RESERVED = Collections - .unmodifiableSet(new HashSet<>(Arrays.asList("namespace", "protocol", "doc", "messages", "types", "errors"))); + private static final Set PROTOCOL_RESERVED = Set.of("namespace", "protocol", "doc", "messages", "types", + "errors"); private Protocol() { super(PROTOCOL_RESERVED); @@ -268,6 +264,7 @@ private Protocol() { * {@code doc}, and {@code namespace} as {code p} has. It also copies all the * {@code props}. */ + @SuppressWarnings("CopyConstructorMissesField") public Protocol(Protocol p) { this(p.getName(), p.getDoc(), p.getNamespace()); putAll(p); @@ -275,15 +272,28 @@ public Protocol(Protocol p) { public Protocol(String name, String doc, String namespace) { super(PROTOCOL_RESERVED); - this.name = name; + setName(name, namespace); this.doc = doc; - this.namespace = namespace; } public Protocol(String name, String namespace) { this(name, null, namespace); } + private void setName(String name, String namespace) { + int lastDot = name.lastIndexOf('.'); + if (lastDot < 0) { + this.name = name; + this.namespace = namespace; + } else { + this.name = name.substring(lastDot + 1); + this.namespace = name.substring(0, lastDot); + } + if (this.namespace != null && this.namespace.isEmpty()) { + this.namespace = null; + } + } + /** The name of this protocol. */ public String getName() { return name; @@ -301,19 +311,30 @@ public String getDoc() { /** The types of this protocol. */ public Collection getTypes() { - return types.values(); + return context.resolveAllSchemas(); + } + + /** @deprecated can return invalid schemata: do NOT use! */ + @Deprecated + public Collection getUnresolvedTypes() { + return context.typesByName().values(); } /** Returns the named type. */ public Schema getType(String name) { - return types.get(name); + Schema namedSchema = null; + if (!name.contains(".")) { + namedSchema = context.getNamedSchema(namespace + "." + name); + } + return namedSchema != null ? namedSchema : context.getNamedSchema(name); } /** Set the types of this protocol. */ public void setTypes(Collection newTypes) { - types = new Schema.Names(); + context = new ParseContext(); for (Schema s : newTypes) - types.add(s); + context.put(s); + context.commit(); } /** The messages of this protocol. */ @@ -332,16 +353,16 @@ public Message createMessage(String name, String doc, Schema request) { * {@code props} of {@code m}. */ public Message createMessage(Message m, Schema request) { - return new Message(name, doc, m, request); + return new Message(m.name, m.doc, m, request); } /** Create a one-way message. */ - public Message createMessage(String name, String doc, JsonProperties propMap, Schema request) { + public Message createMessage(String name, String doc, JsonProperties propMap, Schema request) { return new Message(name, doc, propMap, request); } /** Create a one-way message. */ - public Message createMessage(String name, String doc, Map propMap, Schema request) { + public Message createMessage(String name, String doc, Map propMap, Schema request) { return new Message(name, doc, propMap, request); } @@ -360,13 +381,13 @@ public Message createMessage(Message m, Schema request, Schema response, Schema } /** Create a two-way message. */ - public Message createMessage(String name, String doc, JsonProperties propMap, Schema request, Schema response, + public Message createMessage(String name, String doc, JsonProperties propMap, Schema request, Schema response, Schema errors) { return new TwoWayMessage(name, doc, propMap, request, response, errors); } /** Create a two-way message. */ - public Message createMessage(String name, String doc, Map propMap, Schema request, Schema response, + public Message createMessage(String name, String doc, Map propMap, Schema request, Schema response, Schema errors) { return new TwoWayMessage(name, doc, propMap, request, response, errors); } @@ -378,13 +399,14 @@ public boolean equals(Object o) { if (!(o instanceof Protocol)) return false; Protocol that = (Protocol) o; - return this.name.equals(that.name) && this.namespace.equals(that.namespace) && this.types.equals(that.types) - && this.messages.equals(that.messages) && this.propsEqual(that); + return Objects.equals(this.name, that.name) && Objects.equals(this.namespace, that.namespace) + && Objects.equals(this.context.resolveAllSchemas(), that.context.resolveAllSchemas()) + && Objects.equals(this.messages, that.messages) && this.propsEqual(that); } @Override public int hashCode() { - return name.hashCode() + namespace.hashCode() + types.hashCode() + messages.hashCode() + propsHashCode(); + return 31 * Objects.hash(name, namespace, context, messages) + propsHashCode(); } /** Render this as JSON. */ @@ -413,26 +435,26 @@ public String toString(boolean pretty) { } void toJson(JsonGenerator gen) throws IOException { - types.space(namespace); - gen.writeStartObject(); gen.writeStringField("protocol", name); - gen.writeStringField("namespace", namespace); + if (namespace != null) { + gen.writeStringField("namespace", namespace); + } if (doc != null) gen.writeStringField("doc", doc); writeProps(gen); gen.writeArrayFieldStart("types"); - Schema.Names resolved = new Schema.Names(namespace); - for (Schema type : types.values()) - if (!resolved.contains(type)) - type.toJson(resolved, gen); + Set knownNames = new HashSet<>(); + for (Schema type : context.resolveAllSchemas()) + if (!knownNames.contains(type.getFullName())) + type.toJson(knownNames, namespace, gen); gen.writeEndArray(); gen.writeObjectFieldStart("messages"); for (Map.Entry e : messages.entrySet()) { gen.writeFieldName(e.getKey()); - e.getValue().toJson(gen); + e.getValue().toJson(knownNames, gen); } gen.writeEndObject(); gen.writeEndObject(); @@ -451,7 +473,9 @@ public byte[] getMD5() { /** Read a protocol from a Json file. */ public static Protocol parse(File file) throws IOException { - return parse(Schema.FACTORY.createParser(file)); + try (JsonParser jsonParser = Schema.FACTORY.createParser(file)) { + return parse(jsonParser); + } } /** Read a protocol from a Json stream. */ @@ -487,20 +511,43 @@ private static Protocol parse(JsonParser parser) { } private void parse(JsonNode json) { - parseNamespace(json); - parseName(json); + parseNameAndNamespace(json); parseTypes(json); parseMessages(json); parseDoc(json); parseProps(json); + + context.commit(); + context.resolveAllSchemas(); + resolveMessageSchemata(); + } + + private void resolveMessageSchemata() { + for (Map.Entry entry : messages.entrySet()) { + Message oldValue = entry.getValue(); + Message newValue; + if (oldValue.isOneWay()) { + newValue = createMessage(oldValue.getName(), oldValue.getDoc(), oldValue, + context.resolve(oldValue.getRequest())); + } else { + Schema request = context.resolve(oldValue.getRequest()); + Schema response = context.resolve(oldValue.getResponse()); + Schema errors = context.resolve(oldValue.getErrors()); + newValue = createMessage(oldValue.getName(), oldValue.getDoc(), oldValue, request, response, errors); + } + entry.setValue(newValue); + } } - private void parseNamespace(JsonNode json) { - JsonNode nameNode = json.get("namespace"); - if (nameNode == null) - return; // no namespace defined - this.namespace = nameNode.textValue(); - types.space(this.namespace); + private void parseNameAndNamespace(JsonNode json) { + JsonNode nameNode = json.get("protocol"); + if (nameNode == null) { + throw new SchemaParseException("No protocol name specified: " + json); + } + JsonNode namespaceNode = json.get("namespace"); + String namespace = namespaceNode == null ? null : namespaceNode.textValue(); + + setName(nameNode.textValue(), namespace); } private void parseDoc(JsonNode json) { @@ -514,23 +561,17 @@ private String parseDocNode(JsonNode json) { return nameNode.textValue(); } - private void parseName(JsonNode json) { - JsonNode nameNode = json.get("protocol"); - if (nameNode == null) - throw new SchemaParseException("No protocol name specified: " + json); - this.name = nameNode.textValue(); - } - private void parseTypes(JsonNode json) { JsonNode defs = json.get("types"); if (defs == null) return; // no types defined if (!defs.isArray()) throw new SchemaParseException("Types not an array: " + defs); + for (JsonNode type : defs) { if (!type.isObject()) throw new SchemaParseException("Type not an object: " + type); - Schema.parse(type, types); + Schema.parse(type, context, namespace); } } @@ -578,8 +619,8 @@ private Message parseMessage(String messageName, JsonNode json) { JsonNode fieldDocNode = field.get("doc"); if (fieldDocNode != null) fieldDoc = fieldDocNode.textValue(); - Field newField = new Field(name, Schema.parse(fieldTypeNode, types), fieldDoc, field.get("default"), true, - Order.ASCENDING); + Field newField = new Field(name, Schema.parse(fieldTypeNode, context, namespace), fieldDoc, field.get("default"), + true, Order.ASCENDING); Set aliases = Schema.parseAliases(field); if (aliases != null) { // add aliases for (String alias : aliases) @@ -594,7 +635,7 @@ private Message parseMessage(String messageName, JsonNode json) { } fields.add(newField); } - Schema request = Schema.createRecord(fields); + Schema request = Schema.createRecord(null, null, null, false, fields); boolean oneWay = false; JsonNode oneWayNode = json.get("one-way"); @@ -613,12 +654,12 @@ private Message parseMessage(String messageName, JsonNode json) { if (oneWay) { if (decls != null) throw new SchemaParseException("one-way can't have errors: " + json); - if (responseNode != null && Schema.parse(responseNode, types).getType() != Schema.Type.NULL) + if (responseNode != null && Schema.parse(responseNode, context, namespace).getType() != Schema.Type.NULL) throw new SchemaParseException("One way response must be null: " + json); return new Message(messageName, doc, mProps, request); } - Schema response = Schema.parse(responseNode, types); + Schema response = Schema.parse(responseNode, context, namespace); List errs = new ArrayList<>(); errs.add(SYSTEM_ERROR); // every method can throw @@ -627,7 +668,7 @@ private Message parseMessage(String messageName, JsonNode json) { throw new SchemaParseException("Errors not an array: " + json); for (JsonNode decl : decls) { String name = decl.textValue(); - Schema schema = this.types.get(name); + Schema schema = this.context.find(name, namespace); if (schema == null) throw new SchemaParseException("Undefined error: " + name); if (!schema.isError()) @@ -642,5 +683,4 @@ private Message parseMessage(String messageName, JsonNode json) { public static void main(String[] args) throws Exception { System.out.println(Protocol.parse(new File(args[0]))); } - } diff --git a/lang/java/avro/src/main/java/org/apache/avro/Resolver.java b/lang/java/avro/src/main/java/org/apache/avro/Resolver.java index 182a91bdff6..8b62b24d757 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/Resolver.java +++ b/lang/java/avro/src/main/java/org/apache/avro/Resolver.java @@ -90,7 +90,7 @@ private static Action resolve(Schema w, Schema r, GenericData d, Map wsymbols = w.getEnumSymbols(); @@ -443,7 +435,7 @@ public static class RecordAdjust extends Action { * fields that will be read from the writer: these n are in the order * dictated by writer's schema. The remaining m fields will be read from * default values (actions for these default values are found in - * {@link RecordAdjust#defaults}. + * {@link RecordAdjust#defaults}). */ public final Field[] readerOrder; diff --git a/lang/java/avro/src/main/java/org/apache/avro/Schema.java b/lang/java/avro/src/main/java/org/apache/avro/Schema.java index 89d0b32061d..87f9d045db6 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/Schema.java +++ b/lang/java/avro/src/main/java/org/apache/avro/Schema.java @@ -25,12 +25,21 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.DoubleNode; import com.fasterxml.jackson.databind.node.NullNode; +import org.apache.avro.path.TracingAvroTypeException; +import org.apache.avro.util.internal.Accessor; +import org.apache.avro.util.internal.Accessor.FieldAccessor; +import org.apache.avro.util.internal.JacksonUtils; +import org.apache.avro.util.internal.ThreadLocalWithInitial; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.Serializable; import java.io.StringWriter; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -38,7 +47,6 @@ import java.util.HashMap; import java.util.HashSet; import java.util.IdentityHashMap; -import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; @@ -46,11 +54,7 @@ import java.util.Map; import java.util.Objects; import java.util.Set; -import org.apache.avro.util.internal.Accessor; -import org.apache.avro.util.internal.Accessor.FieldAccessor; -import org.apache.avro.util.internal.JacksonUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import java.util.stream.Collectors; import static org.apache.avro.LogicalType.LOGICAL_TYPE_PROP; @@ -75,9 +79,9 @@ *
  • null. * * - * A schema can be constructed using one of its static createXXX - * methods, or more conveniently using {@link SchemaBuilder}. The schema objects - * are logically immutable. There are only two mutating methods - + * Construct a schema using one of its static createXXX methods, or + * more conveniently using {@link SchemaBuilder}. The schema objects are + * logically immutable. There are only two mutating methods - * {@link #setFields(List)} and {@link #addProp(String, String)}. The following * restrictions apply on these two methods. *
      @@ -88,6 +92,7 @@ * property. *
    */ +@SuppressWarnings("unused") public abstract class Schema extends JsonProperties implements Serializable { private static final long serialVersionUID = 1L; @@ -105,7 +110,10 @@ private static final class SerializableSchema implements Serializable { private String schemaString; private Object readResolve() { - return new Schema.Parser().parse(schemaString); + // The schema can be built using any validation, which we cannot reproduce. + // Assuming best practice precautions when using serialized data, we can + // safely enough disable validation here. + return JsonSchemaParser.parseInternal(schemaString); } } @@ -120,20 +128,20 @@ private Object readResolve() { FACTORY.setCodec(MAPPER); } - /** The type of a schema. */ + /** The type of schema. */ public enum Type { RECORD, ENUM, ARRAY, MAP, UNION, FIXED, STRING, BYTES, INT, LONG, FLOAT, DOUBLE, BOOLEAN, NULL; private final String name; - private Type() { + Type() { this.name = this.name().toLowerCase(Locale.ENGLISH); } public String getName() { return name; } - }; + } private final Type type; private LogicalType logicalType = null; @@ -201,9 +209,9 @@ void setLogicalType(LogicalType logicalType) { * Create an anonymous record schema. * * @deprecated This method allows to create Schema objects that cannot be parsed - * by {@link Schema.Parser#parse(String)}. It will be removed in a - * future version of Avro. Better use - * i{@link #createRecord(String, String, String, boolean, List)} to + * by {@link SchemaParser#parse(CharSequence)}. It will be removed + * in a future version of Avro. Better use + * {@link #createRecord(String, String, String, boolean, List)} to * produce a fully qualified Schema. */ @Deprecated @@ -268,7 +276,7 @@ public Type getType() { * fieldName. If there is no field by that name, a null is * returned. */ - public Field getField(String fieldname) { + public Field getField(String fieldName) { throw new AvroRuntimeException("Not a record: " + this); } @@ -280,6 +288,13 @@ public List getFields() { throw new AvroRuntimeException("Not a record: " + this); } + /** + * If this is a record, returns whether the fields have been set. + */ + public boolean hasFields() { + throw new AvroRuntimeException("Not a record: " + this); + } + /** * If this is a record, set its fields. The fields can be set only once in a * schema. @@ -382,7 +397,16 @@ public int getFixedSize() { throw new AvroRuntimeException("Not fixed: " + this); } - /** Render this as JSON. */ + /** + *

    + * Render this as JSON. + *

    + * + *

    + * This method is equivalent to: + * {@code SchemaFormatter.getInstance("json").format(this)} + *

    + */ @Override public String toString() { return toString(false); @@ -392,9 +416,12 @@ public String toString() { * Render this as JSON. * * @param pretty if true, pretty-print JSON. + * @deprecated Use {@link SchemaFormatter#format(Schema)} instead, using the + * format {@code json/pretty} or {@code json/inline} */ + @Deprecated public String toString(boolean pretty) { - return toString(new Names(), pretty); + return toString(new HashSet(), pretty); } /** @@ -407,30 +434,33 @@ public String toString(boolean pretty) { // Use at your own risk. This method should be removed with AVRO-2832. @Deprecated public String toString(Collection referencedSchemas, boolean pretty) { - Schema.Names names = new Schema.Names(); + Set knownNames = new HashSet<>(); if (referencedSchemas != null) { for (Schema s : referencedSchemas) { - names.add(s); + knownNames.add(s.getFullName()); } } - return toString(names, pretty); + return toString(knownNames, pretty); } - String toString(Names names, boolean pretty) { + @Deprecated + String toString(Set knownNames, boolean pretty) { try { StringWriter writer = new StringWriter(); - JsonGenerator gen = FACTORY.createGenerator(writer); - if (pretty) - gen.useDefaultPrettyPrinter(); - toJson(names, gen); - gen.flush(); - return writer.toString(); + try (JsonGenerator gen = FACTORY.createGenerator(writer)) { + if (pretty) + gen.useDefaultPrettyPrinter(); + toJson(knownNames, null, gen); + gen.flush(); + return writer.toString(); + } } catch (IOException e) { throw new AvroRuntimeException(e); } } - void toJson(Names names, JsonGenerator gen) throws IOException { + @Deprecated + void toJson(Set knownNames, String namespace, JsonGenerator gen) throws IOException { if (!hasProps()) { // no props defined gen.writeString(getName()); // just write name } else { @@ -441,7 +471,8 @@ void toJson(Names names, JsonGenerator gen) throws IOException { } } - void fieldsToJson(Names names, JsonGenerator gen) throws IOException { + @Deprecated + void fieldsToJson(Set knownNames, String namespace, JsonGenerator gen) throws IOException { throw new AvroRuntimeException("Not a record: " + this); } @@ -475,12 +506,12 @@ final boolean equalCachedHash(Schema other) { private static final Set FIELD_RESERVED = Collections .unmodifiableSet(new HashSet<>(Arrays.asList("default", "doc", "name", "order", "type", "aliases"))); - /** Returns true if this record is an union type. */ + /** Returns true if this record is a union type. */ public boolean isUnion() { return this instanceof UnionSchema; } - /** Returns true if this record is an union type containing null. */ + /** Returns true if this record is a union type containing null. */ public boolean isNullable() { if (!isUnion()) { return getType().equals(Schema.Type.NULL); @@ -524,10 +555,10 @@ public enum Order { private final String name; - private Order() { + Order() { this.name = this.name().toLowerCase(Locale.ENGLISH); } - }; + } /** * For Schema unions with a "null" type as the first entry, this can be used to @@ -546,7 +577,7 @@ private Order() { Field(String name, Schema schema, String doc, JsonNode defaultValue, boolean validateDefault, Order order) { super(FIELD_RESERVED); this.name = validateName(name); - this.schema = schema; + this.schema = Objects.requireNonNull(schema, "schema is required and cannot be null"); this.doc = doc; this.defaultValue = validateDefault ? validateDefault(name, schema, defaultValue) : defaultValue; this.order = Objects.requireNonNull(order, "Order cannot be null"); @@ -569,14 +600,14 @@ public Field(Field field, Schema schema) { * */ public Field(String name, Schema schema) { - this(name, schema, (String) null, (JsonNode) null, true, Order.ASCENDING); + this(name, schema, null, null, true, Order.ASCENDING); } /** * */ public Field(String name, Schema schema, String doc) { - this(name, schema, doc, (JsonNode) null, true, Order.ASCENDING); + this(name, schema, doc, null, true, Order.ASCENDING); } /** @@ -601,7 +632,7 @@ public Field(String name, Schema schema, String doc, Object defaultValue, Order public String name() { return name; - }; + } /** The position of this field within the record. */ public int pos() { @@ -703,7 +734,7 @@ public Name(String name, String space) { this.name = validateName(name); } else { // qualified name space = name.substring(0, lastDot); // get space from name - this.name = validateName(name.substring(lastDot + 1, name.length())); + this.name = validateName(name.substring(lastDot + 1)); } if ("".equals(space)) space = null; @@ -731,20 +762,44 @@ public String toString() { return full; } - public void writeName(Names names, JsonGenerator gen) throws IOException { + public void writeName(String currentNamespace, JsonGenerator gen) throws IOException { if (name != null) gen.writeStringField("name", name); if (space != null) { - if (!space.equals(names.space())) + if (!space.equals(currentNamespace)) gen.writeStringField("namespace", space); - } else if (names.space() != null) { // null within non-null + } else if (currentNamespace != null) { // null within non-null gen.writeStringField("namespace", ""); } } public String getQualified(String defaultSpace) { - return (space == null || space.equals(defaultSpace)) ? name : full; + return this.shouldWriteFull(defaultSpace) ? full : name; + } + + /** + * Determine if full name must be written. There are 2 cases for true : + * {@code defaultSpace} != from {@code this.space} or name is already a + * {@code Schema.Type} (int, array, ...) + * + * @param defaultSpace : default name space. + * @return true if full name must be written. + */ + private boolean shouldWriteFull(String defaultSpace) { + if (space != null && space.equals(defaultSpace)) { + for (Type schemaType : Type.values()) { + if (schemaType.name.equals(name)) { + // name is a 'Type', so namespace must be written + return true; + } + } + // this.space == defaultSpace + return false; + } + // this.space != defaultSpace, so namespace must be written. + return true; } + } private static abstract class NamedSchema extends Schema { @@ -800,22 +855,25 @@ public Set getAliases() { Set result = new LinkedHashSet<>(); if (aliases != null) for (Name alias : aliases) - result.add(alias.full); + if (alias.space == null && name.space != null) + result.add("." + alias.name); + else + result.add(alias.full); return result; } - public boolean writeNameRef(Names names, JsonGenerator gen) throws IOException { - if (this.equals(names.get(name))) { - gen.writeString(name.getQualified(names.space())); - return true; - } else if (name.name != null) { - names.put(name, this); + public boolean writeNameRef(Set knownNames, String currentNamespace, JsonGenerator gen) throws IOException { + if (name.name != null) { + if (!knownNames.add(name.full)) { + gen.writeString(name.getQualified(currentNamespace)); + return true; + } } return false; } - public void writeName(Names names, JsonGenerator gen) throws IOException { - name.writeName(names, gen); + public void writeName(String currentNamespace, JsonGenerator gen) throws IOException { + name.writeName(currentNamespace, gen); } public boolean equalNames(NamedSchema that) { @@ -828,7 +886,7 @@ int computeHash() { } public void aliasesToJson(JsonGenerator gen) throws IOException { - if (aliases == null || aliases.size() == 0) + if (aliases == null || aliases.isEmpty()) return; gen.writeFieldName("aliases"); gen.writeStartArray(); @@ -844,8 +902,8 @@ public void aliasesToJson(JsonGenerator gen) throws IOException { * and need to watch for recursion. */ public static class SeenPair { - private Object s1; - private Object s2; + private final Object s1; + private final Object s2; public SeenPair(Object s1, Object s2) { this.s1 = s1; @@ -864,10 +922,9 @@ public int hashCode() { } } - private static final ThreadLocal SEEN_EQUALS = ThreadLocal.withInitial(HashSet::new); - private static final ThreadLocal SEEN_HASHCODE = ThreadLocal.withInitial(IdentityHashMap::new); + private static final ThreadLocal> SEEN_EQUALS = ThreadLocalWithInitial.of(HashSet::new); + private static final ThreadLocal> SEEN_HASHCODE = ThreadLocalWithInitial.of(IdentityHashMap::new); - @SuppressWarnings(value = "unchecked") private static class RecordSchema extends NamedSchema { private List fields; private Map fieldMap; @@ -890,10 +947,10 @@ public boolean isError() { } @Override - public Field getField(String fieldname) { + public Field getField(String fieldName) { if (fieldMap == null) throw new AvroRuntimeException("Schema fields not set yet"); - return fieldMap.get(fieldname); + return fieldMap.get(fieldName); } @Override @@ -903,6 +960,11 @@ public List getFields() { return fields; } + @Override + public boolean hasFields() { + return fields != null; + } + @Override public void setFields(List fields) { if (this.fields != null) { @@ -940,7 +1002,7 @@ public boolean equals(Object o) { return false; if (!propsEqual(that)) return false; - Set seen = SEEN_EQUALS.get(); + Set seen = SEEN_EQUALS.get(); SeenPair here = new SeenPair(this, o); if (seen.contains(here)) return true; // prevent stack overflow @@ -956,7 +1018,7 @@ public boolean equals(Object o) { @Override int computeHash() { - Map seen = SEEN_HASHCODE.get(); + Map seen = SEEN_HASHCODE.get(); if (seen.containsKey(this)) return 0; // prevent stack overflow boolean first = seen.isEmpty(); @@ -970,36 +1032,36 @@ int computeHash() { } @Override - void toJson(Names names, JsonGenerator gen) throws IOException { - if (writeNameRef(names, gen)) + @Deprecated + void toJson(Set knownNames, String currentNamespace, JsonGenerator gen) throws IOException { + if (writeNameRef(knownNames, currentNamespace, gen)) return; - String savedSpace = names.space; // save namespace gen.writeStartObject(); gen.writeStringField("type", isError ? "error" : "record"); - writeName(names, gen); - names.space = name.space; // set default namespace - if (getDoc() != null) - gen.writeStringField("doc", getDoc()); + writeName(currentNamespace, gen); + if (this.getDoc() != null) { + gen.writeStringField("doc", this.getDoc()); + } if (fields != null) { gen.writeFieldName("fields"); - fieldsToJson(names, gen); + fieldsToJson(knownNames, name.space, gen); } writeProps(gen); aliasesToJson(gen); gen.writeEndObject(); - names.space = savedSpace; // restore namespace } @Override - void fieldsToJson(Names names, JsonGenerator gen) throws IOException { + @Deprecated + void fieldsToJson(Set knownNames, String namespace, JsonGenerator gen) throws IOException { gen.writeStartArray(); for (Field f : fields) { gen.writeStartObject(); gen.writeStringField("name", f.name()); gen.writeFieldName("type"); - f.schema().toJson(names, gen); + f.schema().toJson(knownNames, namespace, gen); if (f.doc() != null) gen.writeStringField("doc", f.doc()); if (f.hasDefaultValue()) { @@ -1008,7 +1070,7 @@ void fieldsToJson(Names names, JsonGenerator gen) throws IOException { } if (f.order() != Field.Order.ASCENDING) gen.writeStringField("order", f.order().name); - if (f.aliases != null && f.aliases.size() != 0) { + if (f.aliases != null && !f.aliases.isEmpty()) { gen.writeFieldName("aliases"); gen.writeStartArray(); for (String alias : f.aliases) @@ -1056,7 +1118,12 @@ public boolean hasEnumSymbol(String symbol) { @Override public int getEnumOrdinal(String symbol) { - return ordinals.get(symbol); + Integer ordinal = ordinals.get(symbol); + if (ordinal == null) { + throw new TracingAvroTypeException( + new AvroTypeException("enum value '" + symbol + "' is not in the enum symbol set: " + symbols)); + } + return ordinal; } @Override @@ -1080,12 +1147,13 @@ int computeHash() { } @Override - void toJson(Names names, JsonGenerator gen) throws IOException { - if (writeNameRef(names, gen)) + @Deprecated + void toJson(Set knownNames, String currentNamespace, JsonGenerator gen) throws IOException { + if (writeNameRef(knownNames, currentNamespace, gen)) return; gen.writeStartObject(); gen.writeStringField("type", "enum"); - writeName(names, gen); + writeName(currentNamespace, gen); if (getDoc() != null) gen.writeStringField("doc", getDoc()); gen.writeArrayFieldStart("symbols"); @@ -1129,11 +1197,12 @@ int computeHash() { } @Override - void toJson(Names names, JsonGenerator gen) throws IOException { + @Deprecated + void toJson(Set knownNames, String namespace, JsonGenerator gen) throws IOException { gen.writeStartObject(); gen.writeStringField("type", "array"); gen.writeFieldName("items"); - elementType.toJson(names, gen); + elementType.toJson(knownNames, namespace, gen); writeProps(gen); gen.writeEndObject(); } @@ -1168,11 +1237,12 @@ int computeHash() { } @Override - void toJson(Names names, JsonGenerator gen) throws IOException { + @Deprecated + void toJson(Set knownNames, String currentNamespace, JsonGenerator gen) throws IOException { gen.writeStartObject(); gen.writeStringField("type", "map"); gen.writeFieldName("values"); - valueType.toJson(names, gen); + valueType.toJson(knownNames, currentNamespace, gen); writeProps(gen); gen.writeEndObject(); } @@ -1201,6 +1271,16 @@ public UnionSchema(LockableArrayList types) { } } + /** + * Checks if a JSON value matches the schema. + * + * @param jsonValue a value to check against the schema + * @return true if the value is valid according to this schema + */ + public boolean isValidDefault(JsonNode jsonValue) { + return this.types.stream().anyMatch((Schema s) -> s.isValidDefault(jsonValue)); + } + @Override public List getTypes() { return types; @@ -1235,12 +1315,19 @@ public void addProp(String name, String value) { } @Override - void toJson(Names names, JsonGenerator gen) throws IOException { + @Deprecated + void toJson(Set knownNames, String currentNamespace, JsonGenerator gen) throws IOException { gen.writeStartArray(); for (Schema type : types) - type.toJson(names, gen); + type.toJson(knownNames, currentNamespace, gen); gen.writeEndArray(); } + + @Override + public String getName() { + return super.getName() + + this.getTypes().stream().map(Schema::getName).collect(Collectors.joining(", ", "[", "]")); + } } private static class FixedSchema extends NamedSchema { @@ -1248,8 +1335,7 @@ private static class FixedSchema extends NamedSchema { public FixedSchema(Name name, String doc, int size) { super(Type.FIXED, name, doc); - if (size < 0) - throw new IllegalArgumentException("Invalid fixed size: " + size); + SystemLimitException.checkMaxBytesLength(size); this.size = size; } @@ -1274,12 +1360,13 @@ int computeHash() { } @Override - void toJson(Names names, JsonGenerator gen) throws IOException { - if (writeNameRef(names, gen)) + @Deprecated + void toJson(Set knownNames, String currentNamespace, JsonGenerator gen) throws IOException { + if (writeNameRef(knownNames, currentNamespace, gen)) return; gen.writeStartObject(); gen.writeStringField("type", "fixed"); - writeName(names, gen); + writeName(currentNamespace, gen); if (getDoc() != null) gen.writeStringField("doc", getDoc()); gen.writeNumberField("size", size); @@ -1343,37 +1430,48 @@ public NullSchema() { * may refer to it by name. */ public static class Parser { - private Names names = new Names(); - private boolean validate = true; + final ParseContext context; + private final NameValidator validate; private boolean validateDefaults = true; + public Parser() { + this(NameValidator.UTF_VALIDATOR); + } + + public Parser(final NameValidator validate) { + this.validate = validate != null ? validate : NameValidator.NO_VALIDATION; + context = new ParseContext(this.validate); + } + + public Parser(final ParseContext context) { + this.validate = context.nameValidator; + this.context = context; + } + /** * Adds the provided types to the set of defined, named types known to this * parser. + * + * @deprecated use addTypes(Iterable types) */ + @Deprecated public Parser addTypes(Map types) { - for (Schema s : types.values()) - names.add(s); - return this; - } - - /** Returns the set of defined, named types known to this parser. */ - public Map getTypes() { - Map result = new LinkedHashMap<>(); - for (Schema s : names.values()) - result.put(s.getFullName(), s); - return result; + return this.addTypes(types.values()); } - /** Enable or disable name validation. */ - public Parser setValidate(boolean validate) { - this.validate = validate; + /** + * Adds the provided types to the set of defined, named types known to this + * parser. + */ + public Parser addTypes(Iterable types) { + for (Schema s : types) + context.put(s); return this; } - /** True iff names are validated. True by default. */ - public boolean getValidate() { - return this.validate; + /** Returns the set of defined, named types known to this parser. */ + public Map getTypes() { + return context.typesByName(); } /** Enable or disable default value validation. */ @@ -1392,7 +1490,7 @@ public boolean getValidateDefaults() { * names known to this parser. */ public Schema parse(File file) throws IOException { - return parse(FACTORY.createParser(file)); + return parse(FACTORY.createParser(file), false, true); } /** @@ -1400,7 +1498,8 @@ public Schema parse(File file) throws IOException { * names known to this parser. The input stream stays open after the parsing. */ public Schema parse(InputStream in) throws IOException { - return parse(FACTORY.createParser(in).disable(JsonParser.Feature.AUTO_CLOSE_SOURCE)); + JsonParser parser = FACTORY.createParser(in).disable(JsonParser.Feature.AUTO_CLOSE_SOURCE); + return parse(parser, true, true); } /** Read a schema from one or more json strings */ @@ -1417,24 +1516,55 @@ public Schema parse(String s, String... more) { */ public Schema parse(String s) { try { - return parse(FACTORY.createParser(s)); + return parse(FACTORY.createParser(s), false, true); + } catch (IOException e) { + throw new SchemaParseException(e); + } + } + + public Schema parseInternal(String s) { + try { + return parse(FACTORY.createParser(s), false, false); } catch (IOException e) { throw new SchemaParseException(e); } } - private Schema parse(JsonParser parser) throws IOException { - boolean saved = validateNames.get(); + private Schema parse(JsonParser parser, boolean allowDanglingContent, boolean resolveSchema) throws IOException { + NameValidator saved = VALIDATE_NAMES.get(); boolean savedValidateDefaults = VALIDATE_DEFAULTS.get(); try { - validateNames.set(validate); + // This ensured we're using the same validation as the ParseContext. + // This is most relevant for field names. + VALIDATE_NAMES.set(validate); VALIDATE_DEFAULTS.set(validateDefaults); - return Schema.parse(MAPPER.readTree(parser), names); + JsonNode jsonNode = MAPPER.readTree(parser); + Schema schema = Schema.parse(jsonNode, context, null); + if (resolveSchema) { + context.commit(); + schema = context.resolve(schema); + } + if (!allowDanglingContent) { + String dangling; + StringWriter danglingWriter = new StringWriter(); + int numCharsReleased = parser.releaseBuffered(danglingWriter); + if (numCharsReleased == -1) { + ByteArrayOutputStream danglingOutputStream = new ByteArrayOutputStream(); + parser.releaseBuffered(danglingOutputStream); // if input isn't chars above it must be bytes + dangling = new String(danglingOutputStream.toByteArray(), StandardCharsets.UTF_8).trim(); + } else { + dangling = danglingWriter.toString().trim(); + } + if (!dangling.isEmpty()) { + throw new SchemaParseException("dangling content after end of schema: " + dangling); + } + } + return schema; } catch (JsonParseException e) { throw new SchemaParseException(e); } finally { parser.close(); - validateNames.set(saved); + VALIDATE_NAMES.set(saved); VALIDATE_DEFAULTS.set(savedValidateDefaults); } } @@ -1446,9 +1576,9 @@ private Schema parse(JsonParser parser) throws IOException { * * @param file The file to read the schema from. * @return The freshly built Schema. - * @throws IOException if there was trouble reading the contents or they are + * @throws IOException if there was trouble reading the contents, or they are * invalid - * @deprecated use {@link Schema.Parser} instead. + * @deprecated use {@link SchemaParser} instead. */ @Deprecated public static Schema parse(File file) throws IOException { @@ -1461,9 +1591,9 @@ public static Schema parse(File file) throws IOException { * * @param in The input stream to read the schema from. * @return The freshly built Schema. - * @throws IOException if there was trouble reading the contents or they are + * @throws IOException if there was trouble reading the contents, or they are * invalid - * @deprecated use {@link Schema.Parser} instead. + * @deprecated use {@link SchemaParser} instead. */ @Deprecated public static Schema parse(InputStream in) throws IOException { @@ -1473,7 +1603,7 @@ public static Schema parse(InputStream in) throws IOException { /** * Construct a schema from JSON text. * - * @deprecated use {@link Schema.Parser} instead. + * @deprecated use {@link SchemaParser} instead. */ @Deprecated public static Schema parse(String jsonSchema) { @@ -1484,11 +1614,12 @@ public static Schema parse(String jsonSchema) { * Construct a schema from JSON text. * * @param validate true if names should be validated, false if not. - * @deprecated use {@link Schema.Parser} instead. + * @deprecated use {@link SchemaParser} instead. */ @Deprecated public static Schema parse(String jsonSchema, boolean validate) { - return new Parser().setValidate(validate).parse(jsonSchema); + final NameValidator validator = validate ? NameValidator.UTF_VALIDATOR : NameValidator.NO_VALIDATION; + return new Parser(validator).parse(jsonSchema); } static final Map PRIMITIVES = new HashMap<>(); @@ -1545,43 +1676,81 @@ public void add(Schema schema) { @Override public Schema put(Name name, Schema schema) { - if (containsKey(name)) - throw new SchemaParseException("Can't redefine: " + name); + if (containsKey(name)) { + final Schema other = super.get(name); + if (!Objects.equals(other, schema)) { + throw new SchemaParseException("Can't redefine: " + name); + } else { + return schema; + } + } return super.put(name, schema); } } - private static ThreadLocal validateNames = ThreadLocal.withInitial(() -> true); + private static final ThreadLocal VALIDATE_NAMES = ThreadLocalWithInitial + .of(() -> NameValidator.UTF_VALIDATOR); private static String validateName(String name) { - if (!validateNames.get()) - return name; // not validating names - if (name == null) - throw new SchemaParseException("Null name"); - int length = name.length(); - if (length == 0) - throw new SchemaParseException("Empty name"); - char first = name.charAt(0); - if (!(Character.isLetter(first) || first == '_')) - throw new SchemaParseException("Illegal initial character: " + name); - for (int i = 1; i < length; i++) { - char c = name.charAt(i); - if (!(Character.isLetterOrDigit(c) || c == '_')) - throw new SchemaParseException("Illegal character in: " + name); + NameValidator.Result result = VALIDATE_NAMES.get().validate(name); + if (!result.isOK()) { + throw new SchemaParseException(result.getErrors()); } return name; } - private static final ThreadLocal VALIDATE_DEFAULTS = ThreadLocal.withInitial(() -> true); + /* + * @deprecated Scheduled for removal. Do Not Use! + */ + @Deprecated + public static void setNameValidator(final NameValidator validator) { + Schema.VALIDATE_NAMES.set(validator); + } + + /* + * @deprecated Scheduled for removal. Do Not Use! + */ + @Deprecated + public static NameValidator getNameValidator() { + return Schema.VALIDATE_NAMES.get(); + } + + private static final ThreadLocal VALIDATE_DEFAULTS = ThreadLocalWithInitial.of(() -> true); private static JsonNode validateDefault(String fieldName, Schema schema, JsonNode defaultValue) { - if (VALIDATE_DEFAULTS.get() && (defaultValue != null) && !isValidDefault(schema, defaultValue)) { // invalid default + if (VALIDATE_DEFAULTS.get() && (defaultValue != null) && !schema.isValidDefault(defaultValue)) { // invalid default String message = "Invalid default for field " + fieldName + ": " + defaultValue + " not a " + schema; throw new AvroTypeException(message); // throw exception } return defaultValue; } + /* + * @deprecated Scheduled for removal. Do Not Use! + */ + @Deprecated + public static void setValidateDefaults(boolean validateDefaults) { + Schema.VALIDATE_DEFAULTS.set(validateDefaults); + } + + /* + * @deprecated Scheduled for removal. Do Not Use! + */ + @Deprecated + public static boolean getValidateDefaults() { + return Schema.VALIDATE_DEFAULTS.get(); + } + + /** + * Checks if a JSON value matches the schema. + * + * @param jsonValue a value to check against the schema + * @return true if the value is valid according to this schema + */ + public boolean isValidDefault(JsonNode jsonValue) { + return isValidDefault(this, jsonValue); + } + private static boolean isValidDefault(Schema schema, JsonNode defaultValue) { if (defaultValue == null) return false; @@ -1616,13 +1785,13 @@ private static boolean isValidDefault(Schema schema, JsonNode defaultValue) { if (!isValidDefault(schema.getValueType(), value)) return false; return true; - case UNION: // union default: first branch - return isValidDefault(schema.getTypes().get(0), defaultValue); + case UNION: // union default: any branch + return schema.getTypes().stream().anyMatch((Schema s) -> isValidValue(s, defaultValue)); case RECORD: if (!defaultValue.isObject()) return false; for (Field field : schema.getFields()) - if (!isValidDefault(field.schema(), + if (!isValidValue(field.schema(), defaultValue.has(field.name()) ? defaultValue.get(field.name()) : field.defaultValue())) return false; return true; @@ -1631,143 +1800,214 @@ private static boolean isValidDefault(Schema schema, JsonNode defaultValue) { } } + /** + * Validate a value against the schema. + * + * @param schema : schema for value. + * @param value : value to validate. + * @return true if ok. + */ + private static boolean isValidValue(Schema schema, JsonNode value) { + if (value == null) + return false; + if (schema.isUnion()) { + // For Union, only need that one sub schema is ok. + for (Schema sub : schema.getTypes()) { + if (Schema.isValidDefault(sub, value)) { + return true; + } + } + return false; + } else { + // for other types, same as validate default. + return Schema.isValidDefault(schema, value); + } + } + /** @see #parse(String) */ - static Schema parse(JsonNode schema, Names names) { + static Schema parse(JsonNode schema, ParseContext context, String currentNameSpace) { if (schema == null) { throw new SchemaParseException("Cannot parse schema"); - } - if (schema.isTextual()) { // name - Schema result = names.get(schema.textValue()); - if (result == null) - throw new SchemaParseException("Undefined name: " + schema); - return result; + } else if (schema.isTextual()) { // name + return context.find(schema.textValue(), currentNameSpace); } else if (schema.isObject()) { - Schema result; String type = getRequiredText(schema, "type", "No type"); - Name name = null; - String savedSpace = names.space(); - String doc = null; - if (type.equals("record") || type.equals("error") || type.equals("enum") || type.equals("fixed")) { - String space = getOptionalText(schema, "namespace"); - doc = getOptionalText(schema, "doc"); - if (space == null) - space = names.space(); - name = new Name(getRequiredText(schema, "name", "No name in schema"), space); - names.space(name.space); // set default namespace - } + final boolean isTypeError = "error".equals(type); if (PRIMITIVES.containsKey(type)) { // primitive - result = create(PRIMITIVES.get(type)); - } else if (type.equals("record") || type.equals("error")) { // record - List fields = new ArrayList<>(); - result = new RecordSchema(name, doc, type.equals("error")); - if (name != null) - names.add(result); - JsonNode fieldsNode = schema.get("fields"); - if (fieldsNode == null || !fieldsNode.isArray()) - throw new SchemaParseException("Record has no fields: " + schema); - for (JsonNode field : fieldsNode) { - String fieldName = getRequiredText(field, "name", "No field name"); - String fieldDoc = getOptionalText(field, "doc"); - JsonNode fieldTypeNode = field.get("type"); - if (fieldTypeNode == null) - throw new SchemaParseException("No field type: " + field); - if (fieldTypeNode.isTextual() && names.get(fieldTypeNode.textValue()) == null) - throw new SchemaParseException(fieldTypeNode + " is not a defined name." + " The type of the \"" + fieldName - + "\" field must be" + " a defined name or a {\"type\": ...} expression."); - Schema fieldSchema = parse(fieldTypeNode, names); - Field.Order order = Field.Order.ASCENDING; - JsonNode orderNode = field.get("order"); - if (orderNode != null) - order = Field.Order.valueOf(orderNode.textValue().toUpperCase(Locale.ENGLISH)); - JsonNode defaultValue = field.get("default"); - if (defaultValue != null - && (Type.FLOAT.equals(fieldSchema.getType()) || Type.DOUBLE.equals(fieldSchema.getType())) - && defaultValue.isTextual()) - defaultValue = new DoubleNode(Double.valueOf(defaultValue.textValue())); - Field f = new Field(fieldName, fieldSchema, fieldDoc, defaultValue, true, order); - Iterator i = field.fieldNames(); - while (i.hasNext()) { // add field props - String prop = i.next(); - if (!FIELD_RESERVED.contains(prop)) - f.addProp(prop, field.get(prop)); - } - f.aliases = parseAliases(field); - fields.add(f); - if (fieldSchema.getLogicalType() == null && getOptionalText(field, LOGICAL_TYPE_PROP) != null) - LOG.warn( - "Ignored the {}.{}.logicalType property (\"{}\"). It should probably be nested inside the \"type\" for the field.", - name, fieldName, getOptionalText(field, "logicalType")); - } - result.setFields(fields); - } else if (type.equals("enum")) { // enum - JsonNode symbolsNode = schema.get("symbols"); - if (symbolsNode == null || !symbolsNode.isArray()) - throw new SchemaParseException("Enum has no symbols: " + schema); - LockableArrayList symbols = new LockableArrayList<>(symbolsNode.size()); - for (JsonNode n : symbolsNode) - symbols.add(n.textValue()); - JsonNode enumDefault = schema.get("default"); - String defaultSymbol = null; - if (enumDefault != null) - defaultSymbol = enumDefault.textValue(); - result = new EnumSchema(name, doc, symbols, defaultSymbol); - if (name != null) - names.add(result); + return parsePrimitive(schema, type); + } else if ("record".equals(type) || isTypeError) { // record + return parseRecord(schema, context, currentNameSpace, isTypeError); + } else if ("enum".equals(type)) { // enum + return parseEnum(schema, context, currentNameSpace); } else if (type.equals("array")) { // array - JsonNode itemsNode = schema.get("items"); - if (itemsNode == null) - throw new SchemaParseException("Array has no items type: " + schema); - result = new ArraySchema(parse(itemsNode, names)); + return parseArray(schema, context, currentNameSpace); } else if (type.equals("map")) { // map - JsonNode valuesNode = schema.get("values"); - if (valuesNode == null) - throw new SchemaParseException("Map has no values type: " + schema); - result = new MapSchema(parse(valuesNode, names)); - } else if (type.equals("fixed")) { // fixed - JsonNode sizeNode = schema.get("size"); - if (sizeNode == null || !sizeNode.isInt()) - throw new SchemaParseException("Invalid or no size: " + schema); - result = new FixedSchema(name, doc, sizeNode.intValue()); - if (name != null) - names.add(result); - } else { // For unions with self reference - Name nameFromType = new Name(type, names.space); - if (names.containsKey(nameFromType)) { - return names.get(nameFromType); - } - throw new SchemaParseException("Type not supported: " + type); - } - Iterator i = schema.fieldNames(); - - Set reserved = SCHEMA_RESERVED; - if (type.equals("enum")) { - reserved = ENUM_RESERVED; - } - while (i.hasNext()) { // add properties - String prop = i.next(); - if (!reserved.contains(prop)) // ignore reserved - result.addProp(prop, schema.get(prop)); - } - // parse logical type if present - result.logicalType = LogicalTypes.fromSchemaIgnoreInvalid(result); - names.space(savedSpace); // restore space - if (result instanceof NamedSchema) { - Set aliases = parseAliases(schema); - if (aliases != null) // add aliases - for (String alias : aliases) - result.addAlias(alias); + return parseMap(schema, context, currentNameSpace); + } else if ("fixed".equals(type)) { // fixed + return parseFixed(schema, context, currentNameSpace); + } else { + throw new SchemaParseException("A schema \"type\" MUST be a primitive type or one of" + + " \"enum\", \"fixed\", \"record\", \"error\", \"array\" or \"map\"."); } - return result; } else if (schema.isArray()) { // union - LockableArrayList types = new LockableArrayList<>(schema.size()); - for (JsonNode typeNode : schema) - types.add(parse(typeNode, names)); - return new UnionSchema(types); + return parseUnion(schema, context, currentNameSpace); } else { throw new SchemaParseException("Schema not yet supported: " + schema); } } + private static Schema parsePrimitive(JsonNode schema, String type) { + Schema result = create(PRIMITIVES.get(type)); + parsePropertiesAndLogicalType(schema, result, SCHEMA_RESERVED); + return result; + } + + private static Schema parseRecord(JsonNode schema, ParseContext context, String currentNameSpace, + boolean isTypeError) { + Name name = parseName(schema, currentNameSpace); + String doc = parseDoc(schema); + Schema result = new RecordSchema(name, doc, isTypeError); + + JsonNode fieldsNode = schema.get("fields"); + if (fieldsNode == null || !fieldsNode.isArray()) + throw new SchemaParseException("Record has no fields: " + schema); + List fields = new ArrayList<>(); + for (JsonNode field : fieldsNode) { + Field f = parseField(field, context, name.space); + fields.add(f); + if (f.schema().getLogicalType() == null && getOptionalText(field, LOGICAL_TYPE_PROP) != null) + LOG.warn( + "Ignored the {}.{}.logicalType property (\"{}\"). It should probably be nested inside the \"type\" for the field.", + name, f.name(), getOptionalText(field, "logicalType")); + } + result.setFields(fields); + parsePropertiesAndLogicalType(schema, result, SCHEMA_RESERVED); + parseAliases(schema, result); + context.put(result); + return result; + } + + private static Field parseField(JsonNode field, ParseContext context, String namespace) { + String fieldName = getRequiredText(field, "name", "No field name"); + String fieldDoc = parseDoc(field); + JsonNode fieldTypeNode = field.get("type"); + if (fieldTypeNode == null) + throw new SchemaParseException("No field type: " + field); + Schema fieldSchema = parse(fieldTypeNode, context, namespace); + + Field.Order order = Field.Order.ASCENDING; + JsonNode orderNode = field.get("order"); + if (orderNode != null) + order = Field.Order.valueOf(orderNode.textValue().toUpperCase(Locale.ENGLISH)); + + JsonNode defaultValue = field.get("default"); + if (defaultValue != null && (Type.FLOAT.equals(fieldSchema.getType()) || Type.DOUBLE.equals(fieldSchema.getType())) + && defaultValue.isTextual()) + defaultValue = new DoubleNode(Double.parseDouble(defaultValue.textValue())); + + Field f = new Field(fieldName, fieldSchema, fieldDoc, defaultValue, true, order); + parseProperties(field, f, FIELD_RESERVED); + f.aliases = parseAliases(field); + return f; + } + + private static Schema parseEnum(JsonNode schema, ParseContext context, String currentNameSpace) { + Name name = parseName(schema, currentNameSpace); + String doc = parseDoc(schema); + + JsonNode symbolsNode = schema.get("symbols"); + if (symbolsNode == null || !symbolsNode.isArray()) { + throw new SchemaParseException("Enum has no symbols: " + schema); + } + LockableArrayList symbols = new LockableArrayList<>(symbolsNode.size()); + for (JsonNode n : symbolsNode) + symbols.add(n.textValue()); + JsonNode enumDefault = schema.get("default"); + String defaultSymbol = null; + if (enumDefault != null) { + defaultSymbol = enumDefault.textValue(); + } + + Schema result = new EnumSchema(name, doc, symbols, defaultSymbol); + parsePropertiesAndLogicalType(schema, result, ENUM_RESERVED); + parseAliases(schema, result); + context.put(result); + return result; + } + + private static Schema parseArray(JsonNode schema, ParseContext context, String currentNameSpace) { + Schema result; + JsonNode itemsNode = schema.get("items"); + if (itemsNode == null) + throw new SchemaParseException("Array has no items type: " + schema); + result = new ArraySchema(parse(itemsNode, context, currentNameSpace)); + parsePropertiesAndLogicalType(schema, result, SCHEMA_RESERVED); + return result; + } + + private static Schema parseMap(JsonNode schema, ParseContext context, String currentNameSpace) { + Schema result; + JsonNode valuesNode = schema.get("values"); + if (valuesNode == null) + throw new SchemaParseException("Map has no values type: " + schema); + result = new MapSchema(parse(valuesNode, context, currentNameSpace)); + parsePropertiesAndLogicalType(schema, result, SCHEMA_RESERVED); + return result; + } + + private static Schema parseFixed(JsonNode schema, ParseContext context, String currentNameSpace) { + Name name = parseName(schema, currentNameSpace); + String doc = parseDoc(schema); + + JsonNode sizeNode = schema.get("size"); + if (sizeNode == null || !sizeNode.isInt()) + throw new SchemaParseException("Invalid or no size: " + schema); + + Schema result = new FixedSchema(name, doc, sizeNode.intValue()); + parsePropertiesAndLogicalType(schema, result, SCHEMA_RESERVED); + parseAliases(schema, result); + context.put(result); + return result; + } + + private static UnionSchema parseUnion(JsonNode schema, ParseContext context, String currentNameSpace) { + LockableArrayList types = new LockableArrayList<>(schema.size()); + for (JsonNode typeNode : schema) + types.add(parse(typeNode, context, currentNameSpace)); + return new UnionSchema(types); + } + + private static void parsePropertiesAndLogicalType(JsonNode jsonNode, Schema result, Set propertiesToSkip) { + parseProperties(jsonNode, result, propertiesToSkip); + // parse logical type if present + result.logicalType = LogicalTypes.fromSchemaIgnoreInvalid(result); + } + + private static void parseProperties(JsonNode schema, JsonProperties result, Set propertiesToSkip) { + schema.fieldNames().forEachRemaining(prop -> { // add properties + if (!propertiesToSkip.contains(prop)) // ignore reserved + result.addProp(prop, schema.get(prop)); + }); + } + + private static Name parseName(JsonNode schema, String currentNameSpace) { + String space = getOptionalText(schema, "namespace"); + if (space == null) + space = currentNameSpace; + return new Name(getRequiredText(schema, "name", "No name in schema"), space); + } + + private static String parseDoc(JsonNode schema) { + return getOptionalText(schema, "doc"); + } + + private static void parseAliases(JsonNode schema, Schema result) { + Set aliases = parseAliases(schema); + if (aliases != null) // add aliases + for (String alias : aliases) + result.addAlias(alias); + } + static Set parseAliases(JsonNode node) { JsonNode aliasesNode = node.get("aliases"); if (aliasesNode == null) @@ -1837,13 +2077,14 @@ public static Schema applyAliases(Schema writer, Schema reader) { Map> fieldAliases = new HashMap<>(1); getAliases(reader, seen, aliases, fieldAliases); - if (aliases.size() == 0 && fieldAliases.size() == 0) + if (aliases.isEmpty() && fieldAliases.isEmpty()) return writer; // no aliases seen.clear(); return applyAliases(writer, seen, aliases, fieldAliases); } + @SuppressWarnings("DataFlowIssue") private static Schema applyAliases(Schema s, Map seen, Map aliases, Map> fieldAliases) { @@ -1899,6 +2140,7 @@ private static Schema applyAliases(Schema s, Map seen, Map seen, Map aliases, Map> fieldAliases) { if (schema instanceof NamedSchema) { @@ -1960,10 +2202,11 @@ private static String getFieldAlias(Name record, String field, Maptrue in the lock() method. It's legal to call lock() any number of * times. Any lock() other than the first one is a no-op. * - * This class throws IllegalStateException if a mutating operation is - * performed after being locked. Since modifications through iterator also use + * If a mutating operation is performed after being locked, it throws an + * IllegalStateException. Since modifications through iterator also use * the list's mutating operations, this effectively blocks all modifications. */ + @SuppressWarnings("unused") static class LockableArrayList extends ArrayList { private static final long serialVersionUID = 1L; private boolean locked = false; @@ -1979,6 +2222,7 @@ public LockableArrayList(List types) { super(types); } + @SafeVarargs public LockableArrayList(E... types) { super(types.length); Collections.addAll(this, types); diff --git a/lang/java/avro/src/main/java/org/apache/avro/SchemaBuilder.java b/lang/java/avro/src/main/java/org/apache/avro/SchemaBuilder.java index 818f89aeb95..66fa8fbbd32 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/SchemaBuilder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/SchemaBuilder.java @@ -232,6 +232,8 @@ */ public class SchemaBuilder { + private static final ObjectMapper MAPPER = new ObjectMapper(); + private SchemaBuilder() { } @@ -2734,7 +2736,7 @@ private static JsonNode toJsonNode(Object o) { } else { s = GenericData.get().toString(o); } - return new ObjectMapper().readTree(s); + return MAPPER.readTree(s); } catch (IOException e) { throw new SchemaBuilderException(e); } diff --git a/lang/java/avro/src/main/java/org/apache/avro/SchemaCompatibility.java b/lang/java/avro/src/main/java/org/apache/avro/SchemaCompatibility.java index 3e5628d9b3b..8b6a2839ad6 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/SchemaCompatibility.java +++ b/lang/java/avro/src/main/java/org/apache/avro/SchemaCompatibility.java @@ -324,8 +324,10 @@ private SchemaCompatibilityResult calculateCompatibility(final Schema reader, fi // Reader compatible with all branches of a writer union is compatible if (writer.getType() == Schema.Type.UNION) { + int index = 0; for (Schema s : writer.getTypes()) { - result = result.mergedWith(getCompatibility(reader, s)); + result = result.mergedWith(getCompatibility(Integer.toString(index), reader, s, location)); + index++; } return result; } diff --git a/lang/java/avro/src/main/java/org/apache/avro/SchemaFormatter.java b/lang/java/avro/src/main/java/org/apache/avro/SchemaFormatter.java new file mode 100644 index 00000000000..dee5382a19f --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/SchemaFormatter.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import java.util.Locale; +import java.util.ServiceLoader; + +/** + * Interface and factory to format schemas to text. + * + *

    + * Schema formats have a name, and optionally a variant (all lowercase). The + * Avro library supports a few formats out of the box: + *

    + * + *
    + * + *
    {@code json}
    + *
    Classic schema definition (which is a form of JSON). Supports the + * variants {@code pretty} (the default) and {@code inline}. Can be written as + * .avsc files. See the specification (Schema + * Declaration) for more details.
    + * + *
    {@code canonical}
    + *
    Parsing Canonical Form; this uniquely defines how Avro data is written. + * Used to generate schema fingerprints.
    + * See the specification (Parsing + * Canonical Form for Schemas) for more details.
    + * + *
    {@code idl}
    + *
    IDL: a format that looks much like source code, and is arguably easier to + * read than JSON. Available when the module {@code avro-idl} is on the + * classpath. See + * IDL Language + * for more details.
    + * + *
    + * + *

    + * Additional formats can be defined by implementing + * {@link SchemaFormatterFactory}. They are located using a + * {@link java.util.ServiceLoader}, which loads them using the context + * ClassLoader when available, or the application ClassLoader when not. See the + * {@code ServiceLoader} class for more details΀. + *

    + * + * @see Specification: + * Schema Declaration + * @see Specification: + * Parsing Canonical Form for Schemas + * @see IDL + * Language + * @see java.util.ServiceLoader + */ +public interface SchemaFormatter { + /** + * Get the schema formatter for the specified format name with optional variant. + * + * @param name a format with optional variant, for example "json/pretty", + * "canonical" or "idl" + * @return the schema formatter for the specified format + * @throws AvroRuntimeException if the schema format is not supported + */ + static SchemaFormatter getInstance(String name) { + int slashPos = name.indexOf("/"); + // SchemaFormatterFactory.getFormatterForVariant(String) receives the name of + // the variant in lowercase (as stated in its javadoc). We're doing a + // case-insensitive comparison on the format name instead, so we don't have to + // convert the format name provided by the factory to lower case. + // This ensures the least amount of assumptions about implementations. + String formatName = slashPos < 0 ? name : name.substring(0, slashPos); + String variantName = slashPos < 0 ? null : name.substring(slashPos + 1).toLowerCase(Locale.ROOT); + + for (SchemaFormatterFactory formatterFactory : SchemaFormatterCache.LOADER) { + if (formatName.equalsIgnoreCase(formatterFactory.formatName())) { + if (variantName == null) { + return formatterFactory.getDefaultFormatter(); + } else { + return formatterFactory.getFormatterForVariant(variantName); + } + } + } + throw new AvroRuntimeException("Unsupported schema format: " + name + "; see the javadoc for valid examples"); + } + + /** + * Format a schema with the specified format. Shorthand for + * {@code getInstance(name).format(schema)}. + * + * @param name the name of the schema format + * @param schema the schema to format + * @return the formatted schema + * @throws AvroRuntimeException if the schema format is not supported + * @see #getInstance(String) + * @see #format(Schema) + */ + static String format(String name, Schema schema) { + return getInstance(name).format(schema); + } + + /** + * Write the specified schema as a String. + * + * @param schema the schema to write + * @return the formatted schema + */ + String format(Schema schema); +} + +class SchemaFormatterCache { + static final ServiceLoader LOADER = ServiceLoader.load(SchemaFormatterFactory.class); +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/SchemaFormatterFactory.java b/lang/java/avro/src/main/java/org/apache/avro/SchemaFormatterFactory.java new file mode 100644 index 00000000000..be731a86ddf --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/SchemaFormatterFactory.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import java.util.Locale; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Service Provider Interface (SPI) for {@link SchemaFormatter}. + * + *

    + * Notes to implementers: + *

    + * + *
      + * + *
    • Implementations are located using a {@link java.util.ServiceLoader}. See + * that class for details.
    • + * + *
    • Implementing classes should either be named + * {@code SchemaFormatterFactory} (where the format is alphanumeric), or + * implement {@link #formatName()}.
    • + * + *
    • Implement at least {@link #getDefaultFormatter()}; use it to call + * {@link #getFormatterForVariant(String)} if the format supports multiple + * variants.
    • + * + *
    • Example implementations are {@link JsonSchemaFormatterFactory} and + * {@link CanonicalSchemaFormatterFactory}
    • + * + *
    + * + * @see java.util.ServiceLoader + */ +public interface SchemaFormatterFactory { + /** + * Return the name of the format this formatter factory supports. + * + *

    + * The default implementation returns the lowercase prefix of the implementing + * class if it is named {@code SchemaFormatterFactory}. That is, if the + * implementing class is named {@code some.package.JsonSchemaFormatterFactory}, + * it returns: {@literal "json"} + *

    + * + * @return the name of the format + */ + default String formatName() { + String simpleName = getClass().getSimpleName(); + Matcher matcher = SchemaFormatterFactoryConstants.SIMPLE_NAME_PATTERN.matcher(simpleName); + if (matcher.matches()) { + return matcher.group(1).toLowerCase(Locale.ROOT); + } else { + throw new AvroRuntimeException( + "Formatter is not named \"SchemaFormatterFactory\"; cannot determine format name."); + } + } + + /** + * Get the default formatter for this schema format. Instances should be + * thread-safe, as they may be cached. + * + *

    + * Implementations should either return the only formatter for this format, or + * call {@link #getFormatterForVariant(String)} with the default variant and + * implement that method as well. + *

    + * + * @return the default formatter for this schema format + */ + SchemaFormatter getDefaultFormatter(); + + /** + * Get a formatter for the specified schema format variant, if multiple variants + * are supported. Instances should be thread-safe, as they may be cached. + * + * @param variantName the name of the format variant (lower case), if specified + * @return if the factory supports the format, a schema writer; {@code null} + * otherwise + */ + default SchemaFormatter getFormatterForVariant(String variantName) { + throw new AvroRuntimeException("The schema format \"" + formatName() + "\" has no variants."); + } +} + +class SchemaFormatterFactoryConstants { + static final Pattern SIMPLE_NAME_PATTERN = Pattern.compile( + "([a-z][0-9a-z]*)" + SchemaFormatterFactory.class.getSimpleName(), + Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/SchemaParser.java b/lang/java/avro/src/main/java/org/apache/avro/SchemaParser.java new file mode 100644 index 00000000000..295cb778e3f --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/SchemaParser.java @@ -0,0 +1,345 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import org.apache.avro.util.UtfTextUtils; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.Reader; +import java.net.URI; +import java.nio.charset.Charset; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.ServiceLoader; + +/** + * Avro schema parser for text-based formats like JSON, IDL, etc. + * + *

    + * Parses formatted (i.e., text based) schemata from a given source using the + * available {@link FormattedSchemaParser} implementations, and returns the + * first result. This means it can transparently handle any schema format. The + * Avro project defines a JSON based format and an IDL format (the latter + * available as a separate dependency), but you can also provide your own. + *

    + * + *

    + * The parser can handle various text based sources. If the source contains a + * UTF encoded latin text based format it can even detect which UTF encoding was + * used (UTF-8, UTF16BE, UTF16LE, UTF-32BE or UTF32LE). + *

    + * + * @see FormattedSchemaParser + * @see UtfTextUtils + */ +public class SchemaParser { + private final ParseContext parseContext; + private final Collection formattedSchemaParsers; + + /** + *

    + * Use a default SchemaParser to parse a single schema. Equivalent to: + *

    + * + * new SchemaParser().parse(schema).mainSchema() + * + * + * @param schema the formatted schema to parse + * @return the parsed schema + * @throws SchemaParseException when the schema is invalid + * @see SchemaParser#parse(CharSequence) + */ + public static Schema parseSingle(String schema) throws SchemaParseException { + return new SchemaParser().parse(schema).mainSchema(); + } + + /** + *

    + * Use a default SchemaParser to parse a single schema from a file. Equivalent + * to: + *

    + * + * new SchemaParser().parse(schemaFile).mainSchema() + * + * + * @param schemaFile the formatted schema to parse + * @return the parsed schema + * @throws SchemaParseException when the schema is invalid + * @see SchemaParser#parse(Path) + */ + public static Schema parseSingle(Path schemaFile) throws SchemaParseException, IOException { + return new SchemaParser().parse(schemaFile).mainSchema(); + } + + /** + * Create a schema parser that validates names using + * {@link NameValidator#UTF_VALIDATOR}. Initially, the list of known (named) + * schemata is empty. + */ + public SchemaParser() { + this(NameValidator.UTF_VALIDATOR); + } + + /** + * Create a schema parser with the specified name validator. + *

    + * Initially, the list of known (named) schemata is empty. + *

    + *

    + * Note: using {@link NameValidator#STRICT_VALIDATOR} to validate names is + * advised for maximum interoperability. + *

    + * + * @param nameValidator the name validator to use + */ + public SchemaParser(NameValidator nameValidator) { + NameValidator validator = nameValidator != null ? nameValidator : NameValidator.NO_VALIDATION; + this.parseContext = new ParseContext(validator); + this.formattedSchemaParsers = new ArrayList<>(); + for (FormattedSchemaParser formattedSchemaParser : ServiceLoader.load(FormattedSchemaParser.class)) { + formattedSchemaParsers.add(formattedSchemaParser); + } + // Add the default JSON parser last (it is not registered as a service, even + // though it implements the service interface), to allow implementations that + // parse JSON files into schemata differently. + formattedSchemaParsers.add(new JsonSchemaParser()); + } + + /** + * Parse an Avro schema from a file. The file content is assumed to be UTF text. + * + * @param file the file to read + * @return the schema + * @throws IOException when the schema cannot be read + * @throws SchemaParseException if parsing the schema failed; contains + * suppressed underlying parse exceptions if + * available + * @see UtfTextUtils UTF detection algorithm in UtfTextUtils + */ + public ParseResult parse(File file) throws IOException, SchemaParseException { + return parse(file, null); + } + + /** + * Parse an Avro schema from a file written with a specific character set. + * + * @param file the file to read + * @param charset the character set of the file contents + * @return the schema + * @throws IOException when the schema cannot be read + * @throws SchemaParseException if parsing the schema failed; contains + * suppressed underlying parse exceptions if + * available + */ + public ParseResult parse(File file, Charset charset) throws IOException, SchemaParseException { + return parse(file.toPath(), charset); + } + + /** + * Parse an Avro schema from a file. The file content is assumed to be UTF text. + * + * @param file the file to read + * @return the schema + * @throws IOException when the schema cannot be read + * @throws SchemaParseException if parsing the schema failed; contains + * suppressed underlying parse exceptions if + * available + * @see UtfTextUtils UTF detection algorithm in UtfTextUtils + */ + public ParseResult parse(Path file) throws IOException, SchemaParseException { + return parse(file, null); + } + + /** + * Parse an Avro schema from a file written with a specific character set. + * + * @param file the file to read + * @param charset the character set of the file contents + * @return the schema + * @throws IOException when the schema cannot be read + * @throws SchemaParseException if parsing the schema failed; contains + * suppressed underlying parse exceptions if + * available + */ + public ParseResult parse(Path file, Charset charset) throws IOException, SchemaParseException { + URI inputDir = file.getParent().toUri(); + try (InputStream stream = Files.newInputStream(file)) { + String formattedSchema = UtfTextUtils.readAllBytes(stream, charset); + return parse(inputDir, formattedSchema); + } + } + + /** + * Parse an Avro schema from a file written with a specific character set. + * + * @param location the location of the schema resource + * @param charset the character set of the schema resource + * @return the schema + * @throws IOException when the schema cannot be read + * @throws SchemaParseException if parsing the schema failed; contains + * suppressed underlying parse exceptions if + * available + */ + public ParseResult parse(URI location, Charset charset) throws IOException, SchemaParseException { + try (InputStream stream = location.toURL().openStream()) { + String formattedSchema = UtfTextUtils.readAllBytes(stream, charset); + return parse(location, formattedSchema); + } + } + + /** + * Parse an Avro schema from an input stream. The stream content is assumed to + * be UTF text. Note that the stream stays open after reading. + * + * @param in the stream to read + * @return the schema + * @throws IOException when the schema cannot be read + * @throws SchemaParseException if parsing the schema failed; contains + * suppressed underlying parse exceptions if + * available + * @see UtfTextUtils UTF detection algorithm in UtfTextUtils + */ + public ParseResult parse(InputStream in) throws IOException, SchemaParseException { + return parse(in, null); + } + + /** + * Parse an Avro schema from an input stream. Note that the stream stays open + * after reading. + * + * @param in the stream to read + * @param charset the character set of the stream contents + * @return the schema + * @throws IOException when the schema cannot be read + * @throws SchemaParseException if parsing the schema failed; contains + * suppressed underlying parse exceptions if + * available + */ + public ParseResult parse(InputStream in, Charset charset) throws IOException, SchemaParseException { + return parse(UtfTextUtils.readAllBytes(in, charset)); + } + + /** + * Parse an Avro schema from an input reader. + * + * @param in the stream to read + * @return the schema + * @throws IOException when the schema cannot be read + * @throws SchemaParseException if parsing the schema failed; contains + * suppressed underlying parse exceptions if + * available + */ + public ParseResult parse(Reader in) throws IOException, SchemaParseException { + return parse(UtfTextUtils.readAllChars(in)); + } + + /** + * Parse an Avro schema from a string. + * + * @param text the text to parse + * @return the schema + * @throws SchemaParseException if parsing the schema failed; contains + * suppressed underlying parse exceptions if + * available + */ + public ParseResult parse(CharSequence text) throws SchemaParseException { + try { + return parse(null, text); + } catch (IOException e) { + // This can only happen if parser implementations try to read other (related) + // schemata from somewhere. + throw new AvroRuntimeException("Could not read schema", e); + } + } + + /** + * Parse the given schema (string) within the specified context using all + * available {@link FormattedSchemaParser} implementations, collecting any + * {@link SchemaParseException}s that occur, and return the first successfully + * parsed schema. If all parsers fail, throw a {@code SchemaParseException} with + * all collected parse exceptions added as suppressed exceptions. Uses the base + * location of the schema (e.g., the directory where the schema file lives) if + * available. + * + * @param baseUri the base location of the schema, or {@code null} if + * not known + * @param formattedSchema the schema as text + * @return the parsed schema + * @throws IOException if thrown by one of the parsers + * @throws RuntimeException if thrown by one of the parsers + * @throws SchemaParseException when all parsers fail + */ + private ParseResult parse(URI baseUri, CharSequence formattedSchema) throws IOException, SchemaParseException { + List parseExceptions = new ArrayList<>(); + for (FormattedSchemaParser formattedSchemaParser : formattedSchemaParsers) { + try { + Schema schema = formattedSchemaParser.parse(parseContext, baseUri, formattedSchema); + if (parseContext.hasNewSchemas() || schema != null) { + // Parsing succeeded: return the result. + return parseContext.commit(schema); + } + } catch (SchemaParseException e) { + parseContext.rollback(); + parseExceptions.add(e); + } + } + + // None of the available parsers succeeded + + if (parseExceptions.size() == 1) { + throw parseExceptions.get(0); + } + SchemaParseException parseException = new SchemaParseException( + "Could not parse the schema (the suppressed exceptions tell why)."); + parseExceptions.forEach(parseException::addSuppressed); + throw parseException; + } + + /** + * Get all parsed schemata. + * + * @return all parsed schemas, in the order they were parsed + */ + public List getParsedNamedSchemas() { + return parseContext.resolveAllSchemas(); + } + + // Temporary method to reduce PR size + @Deprecated + public Schema resolve(ParseResult result) { + return result.mainSchema(); + } + + public interface ParseResult { + /** + * The main schema parsed from a file. Can be any schema, or {@code null} if the + * parsed file has no "main" schema. + */ + Schema mainSchema(); + + /** + * The list of named schemata that were parsed. + */ + List parsedNamedSchemas(); + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/SystemLimitException.java b/lang/java/avro/src/main/java/org/apache/avro/SystemLimitException.java new file mode 100644 index 00000000000..886a939735c --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/SystemLimitException.java @@ -0,0 +1,298 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Thrown to prevent making large allocations when reading potentially + * pathological input data from an untrusted source. + *

    + * The following system properties can be set to limit the size of bytes, + * strings and collection types to be allocated: + *

      + *
    • org.apache.avro.limits.byte.maxLength
    • limits the maximum + * size of byte types. + *
    • org.apache.avro.limits.collectionItems.maxLength
    • limits the + * maximum number of map and list items that can be read at + * once single sequence. + *
    • org.apache.avro.limits.string.maxLength
    • limits the maximum + * size of string types. + *
    + * + * The default is to permit sizes up to {@link #MAX_ARRAY_VM_LIMIT}. + */ +public class SystemLimitException extends AvroRuntimeException { + + /** + * The maximum length of array to allocate (unless necessary). Some VMs reserve + * some header words in an array. Attempts to allocate larger arrays may result + * in {@code OutOfMemoryError: Requested array size exceeds VM limit} + * + * @see JDK-8246725 + */ + // VisibleForTesting + static final int MAX_ARRAY_VM_LIMIT = Integer.MAX_VALUE - 8; + + public static final String MAX_BYTES_LENGTH_PROPERTY = "org.apache.avro.limits.bytes.maxLength"; + public static final String MAX_COLLECTION_LENGTH_PROPERTY = "org.apache.avro.limits.collectionItems.maxLength"; + public static final String MAX_STRING_LENGTH_PROPERTY = "org.apache.avro.limits.string.maxLength"; + + private static int maxBytesLength = MAX_ARRAY_VM_LIMIT; + private static int maxCollectionLength = MAX_ARRAY_VM_LIMIT; + private static int maxStringLength = MAX_ARRAY_VM_LIMIT; + + private static final Logger LOG = LoggerFactory.getLogger(SystemLimitException.class); + + /** + * System property declaring max size of any decompression stream: {@value}. + */ + public static final String MAX_DECOMPRESS_LENGTH_PROPERTY = "org.apache.avro.limits.decompress.maxLength"; + + /** + * Default limit when it is lower than the heap-aware limit: {@value}. + */ + private static final long DEFAULT_MAX_DECOMPRESS_LENGTH = 200L * 1024 * 1024; + + /** + * Keep the default decompression limit below the maximum heap to avoid allowing + * a single block to exhaust constrained JVMs: {@value}. + */ + private static final long DEFAULT_MAX_DECOMPRESS_HEAP_FRACTION = 4; + + /** + * Calculated max decompress length. + */ + public static final long MAX_DECOMPRESS_LENGTH = getLongLimitFromProperty(MAX_DECOMPRESS_LENGTH_PROPERTY, + defaultMaxDecompressLength()); + + static { + resetLimits(); + } + + public SystemLimitException(String message) { + super(message); + } + + /** + * Get an integer value stored in a system property, used to configure the + * system behaviour of decoders + * + * @param property The system property to fetch + * @param defaultValue The value to use if the system property is not present or + * parsable as an int + * @return The value from the system property + */ + private static int getLimitFromProperty(String property, int defaultValue) { + String o = System.getProperty(property); + int i = defaultValue; + if (o != null) { + try { + i = Integer.parseUnsignedInt(o); + } catch (NumberFormatException nfe) { + LoggerFactory.getLogger(SystemLimitException.class).warn("Could not parse property " + property + ": " + o, + nfe); + } + } + return i; + } + + /** + * Get a long value stored in a system property, used to configure the system + * behaviour of output. + * + * @param property The system property to fetch + * @param defaultValue The value to use if the system property is not present or + * parsable as a long + * @return The value from the system property + */ + private static long getLongLimitFromProperty(String property, long defaultValue) { + String prop = System.getProperty(property); + long limit = defaultValue; + if (prop != null) { + try { + long parsed = Long.parseLong(prop); + if (parsed <= 0) { + LOG.warn("Invalid value '{}' for property '{}': must be positive. Using default: {}", prop, property, + defaultValue); + } else { + limit = parsed; + } + } catch (NumberFormatException e) { + LOG.warn("Could not parse property '{}' value '{}'. Using default: {}", property, prop, defaultValue); + } + } + return limit; + } + + /** + * Calculate a max decompression length as a fraction of the maximum memory of + * the runtime. + * + * @return the calculated max default decompression length. + */ + private static long defaultMaxDecompressLength() { + return Math.min(DEFAULT_MAX_DECOMPRESS_LENGTH, + Math.max(1L, Runtime.getRuntime().maxMemory() / DEFAULT_MAX_DECOMPRESS_HEAP_FRACTION)); + } + + /** + * Check to ensure that reading the bytes is within the specified limits. + * + * @param length The proposed size of the bytes to read + * @return The size of the bytes if and only if it is within the limit and + * non-negative. + * @throws UnsupportedOperationException if reading the datum would allocate a + * collection that the Java VM would be + * unable to handle + * @throws SystemLimitException if the decoding should fail because it + * would otherwise result in an allocation + * exceeding the set limit + * @throws AvroRuntimeException if the length is negative + */ + public static int checkMaxBytesLength(long length) { + if (length < 0) { + throw new AvroRuntimeException("Malformed data. Length is negative: " + length); + } + if (length > MAX_ARRAY_VM_LIMIT) { + throw new UnsupportedOperationException( + "Cannot read arrays longer than " + MAX_ARRAY_VM_LIMIT + " bytes in Java library"); + } + if (length > maxBytesLength) { + throw new SystemLimitException("Bytes length " + length + " exceeds maximum allowed"); + } + return (int) length; + } + + /** + * Check to ensure that reading the specified number of items remains within the + * specified limits. + * + * @param existing The number of elements items read in the collection + * @param items The next number of items to read. In normal usage, this is + * always a positive, permitted value. Negative and zero values + * have a special meaning in Avro decoding. + * @return The total number of items in the collection if and only if it is + * within the limit and non-negative. + * @throws UnsupportedOperationException if reading the items would allocate a + * collection that the Java VM would be + * unable to handle + * @throws SystemLimitException if the decoding should fail because it + * would otherwise result in an allocation + * exceeding the set limit + * @throws AvroRuntimeException if the length is negative + */ + public static int checkMaxCollectionLength(long existing, long items) { + long length = existing + items; + if (existing < 0) { + throw new AvroRuntimeException("Malformed data. Length is negative: " + existing); + } + if (items < 0) { + throw new AvroRuntimeException("Malformed data. Length is negative: " + items); + } + if (length > MAX_ARRAY_VM_LIMIT || length < existing) { + throw new UnsupportedOperationException( + "Cannot read collections larger than " + MAX_ARRAY_VM_LIMIT + " items in Java library"); + } + if (length > maxCollectionLength) { + throw new SystemLimitException("Collection length " + length + " exceeds maximum allowed"); + } + return (int) length; + } + + /** + * Check to ensure that reading the specified number of items remains within the + * specified limits. + * + * @param items The next number of items to read. In normal usage, this is + * always a positive, permitted value. Negative and zero values + * have a special meaning in Avro decoding. + * @return The total number of items in the collection if and only if it is + * within the limit and non-negative. + * @throws UnsupportedOperationException if reading the items would allocate a + * collection that the Java VM would be + * unable to handle + * @throws SystemLimitException if the decoding should fail because it + * would otherwise result in an allocation + * exceeding the set limit + * @throws AvroRuntimeException if the length is negative + */ + public static int checkMaxCollectionLength(long items) { + if (items > MAX_ARRAY_VM_LIMIT) { + throw new UnsupportedOperationException( + "Cannot read collections larger than " + MAX_ARRAY_VM_LIMIT + " items in Java library"); + } + if (items > maxCollectionLength) { + throw new SystemLimitException( + "Collection length " + items + " exceeds the maximum allowed of " + maxCollectionLength); + } + return (int) items; + } + + /** + * Check to ensure that reading the string size is within the specified limits. + * + * @param length The proposed size of the string to read + * @return The size of the string if and only if it is within the limit and + * non-negative. + * @throws UnsupportedOperationException if reading the items would allocate a + * collection that the Java VM would be + * unable to handle + * @throws SystemLimitException if the decoding should fail because it + * would otherwise result in an allocation + * exceeding the set limit + * @throws AvroRuntimeException if the length is negative + */ + public static int checkMaxStringLength(long length) { + if (length < 0) { + throw new AvroRuntimeException("Malformed data. Length is negative: " + length); + } + if (length > MAX_ARRAY_VM_LIMIT) { + throw new UnsupportedOperationException("Cannot read strings longer than " + MAX_ARRAY_VM_LIMIT + " bytes"); + } + if (length > maxStringLength) { + throw new SystemLimitException("String length " + length + " exceeds maximum allowed"); + } + return (int) length; + } + + /** + * Check there is capacity to write data to a stream. + * + * @param limit total capacity limit + * @param streamLength current stream size + * @param bytes bytes to add to the stream + * @throws SystemLimitException if the limit is exceeded. + */ + public static void checkMaxDecompressCapacity(long limit, long streamLength, int bytes) { + if (streamLength + bytes > limit) { + throw new SystemLimitException( + String.format("Buffer size %,d (bytes) exceeds maximum allowed size %,d.", (streamLength + bytes), limit)); + } + } + + /** Reread the limits from the system properties. */ + // VisibleForTesting + static void resetLimits() { + maxBytesLength = getLimitFromProperty(MAX_BYTES_LENGTH_PROPERTY, MAX_ARRAY_VM_LIMIT); + maxCollectionLength = getLimitFromProperty(MAX_COLLECTION_LENGTH_PROPERTY, MAX_ARRAY_VM_LIMIT); + maxStringLength = getLimitFromProperty(MAX_STRING_LENGTH_PROPERTY, MAX_ARRAY_VM_LIMIT); + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/UnresolvedUnionException.java b/lang/java/avro/src/main/java/org/apache/avro/UnresolvedUnionException.java index 8f25c2cc9a8..1e11a4967af 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/UnresolvedUnionException.java +++ b/lang/java/avro/src/main/java/org/apache/avro/UnresolvedUnionException.java @@ -24,13 +24,14 @@ public class UnresolvedUnionException extends AvroRuntimeException { private Schema unionSchema; public UnresolvedUnionException(Schema unionSchema, Object unresolvedDatum) { - super("Not in union " + unionSchema + ": " + unresolvedDatum); + super("Not in union " + unionSchema + ": " + datumTypeDescription(unresolvedDatum)); this.unionSchema = unionSchema; this.unresolvedDatum = unresolvedDatum; } public UnresolvedUnionException(Schema unionSchema, Schema.Field field, Object unresolvedDatum) { - super("Not in union " + unionSchema + ": " + unresolvedDatum + " (field=" + field.name() + ")"); + super( + "Not in union " + unionSchema + ": " + datumTypeDescription(unresolvedDatum) + " (field=" + field.name() + ")"); this.unionSchema = unionSchema; this.unresolvedDatum = unresolvedDatum; } @@ -42,4 +43,8 @@ public Object getUnresolvedDatum() { public Schema getUnionSchema() { return unionSchema; } + + private static String datumTypeDescription(Object datum) { + return datum == null ? "null" : datum.getClass().getName(); + } } diff --git a/lang/java/avro/src/main/java/org/apache/avro/data/Json.java b/lang/java/avro/src/main/java/org/apache/avro/data/Json.java index ca73cc32870..03948107427 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/data/Json.java +++ b/lang/java/avro/src/main/java/org/apache/avro/data/Json.java @@ -19,8 +19,10 @@ import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.util.Iterator; +import org.apache.avro.JsonSchemaParser; import org.apache.avro.util.internal.JacksonUtils; import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.databind.JsonNode; @@ -43,6 +45,8 @@ import org.apache.avro.io.DecoderFactory; import org.apache.avro.io.ResolvingDecoder; +import static java.util.Objects.requireNonNull; + /** Utilities for reading and writing arbitrary Json data in Avro format. */ public class Json { private Json() { @@ -53,10 +57,14 @@ private Json() { /** The schema for Json data. */ public static final Schema SCHEMA; + + private static final String JSON_AVSC_NOT_FOUND = "Packaged Avro JSON schema not found"; + static { try { try (InputStream in = Json.class.getResourceAsStream("/org/apache/avro/data/Json.avsc")) { - SCHEMA = new Schema.Parser().parse(in); + String schema = new String(requireNonNull(in, JSON_AVSC_NOT_FOUND).readAllBytes(), StandardCharsets.UTF_8); + SCHEMA = JsonSchemaParser.parseInternal(schema); } } catch (IOException e) { throw new AvroRuntimeException(e); diff --git a/lang/java/avro/src/main/java/org/apache/avro/data/TimeConversions.java b/lang/java/avro/src/main/java/org/apache/avro/data/TimeConversions.java index 785d31a5116..e63ebaae6e0 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/data/TimeConversions.java +++ b/lang/java/avro/src/main/java/org/apache/avro/data/TimeConversions.java @@ -204,6 +204,53 @@ public Schema getRecommendedSchema() { } } + public static class TimestampNanosConversion extends Conversion { + @Override + public Class getConvertedType() { + return Instant.class; + } + + @Override + public String getLogicalTypeName() { + return "timestamp-nanos"; + } + + @Override + public String adjustAndSetValue(String varName, String valParamName) { + return varName + " = " + valParamName + ".truncatedTo(java.time.temporal.ChronoUnit.NANOS);"; + } + + @Override + public Instant fromLong(Long microsFromEpoch, Schema schema, LogicalType type) { + long epochSeconds = microsFromEpoch / 1_000_000_000L; + long nanoAdjustment = microsFromEpoch % 1_000_000_000L; + + return Instant.ofEpochSecond(epochSeconds, nanoAdjustment); + } + + @Override + public Long toLong(Instant instant, Schema schema, LogicalType type) { + long seconds = instant.getEpochSecond(); + int nanos = instant.getNano(); + + if (seconds < 0 && nanos > 0) { + long micros = Math.multiplyExact(seconds + 1, 1_000_000_000L); + long adjustment = nanos - 1_000_000; + + return Math.addExact(micros, adjustment); + } else { + long micros = Math.multiplyExact(seconds, 1_000_000_000L); + + return Math.addExact(micros, nanos); + } + } + + @Override + public Schema getRecommendedSchema() { + return LogicalTypes.timestampNanos().addToSchema(Schema.create(Schema.Type.LONG)); + } + } + public static class LocalTimestampMillisConversion extends Conversion { private final TimestampMillisConversion timestampMillisConversion = new TimestampMillisConversion(); @@ -265,4 +312,35 @@ public Schema getRecommendedSchema() { return LogicalTypes.localTimestampMicros().addToSchema(Schema.create(Schema.Type.LONG)); } } + + public static class LocalTimestampNanosConversion extends Conversion { + private final TimestampNanosConversion timestampNanosConversion = new TimestampNanosConversion(); + + @Override + public Class getConvertedType() { + return LocalDateTime.class; + } + + @Override + public String getLogicalTypeName() { + return "local-timestamp-nanos"; + } + + @Override + public LocalDateTime fromLong(Long microsFromEpoch, Schema schema, LogicalType type) { + Instant instant = timestampNanosConversion.fromLong(microsFromEpoch, schema, type); + return LocalDateTime.ofInstant(instant, ZoneOffset.UTC); + } + + @Override + public Long toLong(LocalDateTime timestamp, Schema schema, LogicalType type) { + Instant instant = timestamp.toInstant(ZoneOffset.UTC); + return timestampNanosConversion.toLong(instant, schema, type); + } + + @Override + public Schema getRecommendedSchema() { + return LogicalTypes.localTimestampNanos().addToSchema(Schema.create(Schema.Type.LONG)); + } + } } diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/BZip2Codec.java b/lang/java/avro/src/main/java/org/apache/avro/file/BZip2Codec.java index fe90557fa2e..dcdc8590687 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/file/BZip2Codec.java +++ b/lang/java/avro/src/main/java/org/apache/avro/file/BZip2Codec.java @@ -25,6 +25,8 @@ import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream; +import static org.apache.avro.util.NonCopyingByteArrayOutputStream.capacityLimitedOutputStream; + /** * Implements bzip2 compression and decompression. */ public class BZip2Codec extends Codec { @@ -60,11 +62,11 @@ public ByteBuffer decompress(ByteBuffer compressedData) throws IOException { compressedData.remaining()); @SuppressWarnings("resource") - NonCopyingByteArrayOutputStream baos = new NonCopyingByteArrayOutputStream(DEFAULT_BUFFER_SIZE); + NonCopyingByteArrayOutputStream baos = capacityLimitedOutputStream(DEFAULT_BUFFER_SIZE); try (BZip2CompressorInputStream inputStream = new BZip2CompressorInputStream(bais)) { - int readCount = -1; + int readCount; while ((readCount = inputStream.read(buffer, compressedData.position(), buffer.length)) > 0) { baos.write(buffer, 0, readCount); } diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/CodecFactory.java b/lang/java/avro/src/main/java/org/apache/avro/file/CodecFactory.java index 351c036b861..1cfed238f7e 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/file/CodecFactory.java +++ b/lang/java/avro/src/main/java/org/apache/avro/file/CodecFactory.java @@ -28,12 +28,14 @@ /** * Encapsulates the ability to specify and configure a compression codec. * - * Currently there are three codecs registered by default: + * Currently there are five codecs registered by default: *
      *
    • {@code null}
    • *
    • {@code deflate}
    • *
    • {@code snappy}
    • *
    • {@code bzip2}
    • + *
    • {@code xz}
    • + *
    • {@code zstandard}
    • *
    * * New and custom codecs can be registered using diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileConstants.java b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileConstants.java index fe269ca06b2..4664f5410df 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileConstants.java +++ b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileConstants.java @@ -27,7 +27,6 @@ private DataFileConstants() { public static final byte VERSION = 1; public static final byte[] MAGIC = new byte[] { (byte) 'O', (byte) 'b', (byte) 'j', VERSION }; - public static final long FOOTER_BLOCK = -1; public static final int SYNC_SIZE = 16; public static final int DEFAULT_SYNC_INTERVAL = 4000 * SYNC_SIZE; diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader.java b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader.java index 7a235352e50..ae33df59fbe 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader.java +++ b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader.java @@ -17,18 +17,19 @@ */ package org.apache.avro.file; +import org.apache.avro.InvalidAvroMagicException; +import org.apache.avro.io.DatumReader; +import org.apache.avro.io.DecoderFactory; +import org.apache.commons.io.IOUtils; + import java.io.EOFException; +import java.io.File; import java.io.IOException; import java.io.InputStream; -import java.io.File; import java.util.Arrays; -import org.apache.avro.InvalidAvroMagicException; -import org.apache.avro.io.DecoderFactory; -import org.apache.commons.compress.utils.IOUtils; -import org.apache.avro.io.DatumReader; -import static org.apache.avro.file.DataFileConstants.SYNC_SIZE; import static org.apache.avro.file.DataFileConstants.MAGIC; +import static org.apache.avro.file.DataFileConstants.SYNC_SIZE; /** * Random access to files written with {@link DataFileWriter}. @@ -36,7 +37,7 @@ * @see DataFileWriter */ public class DataFileReader extends DataFileStream implements FileReader { - private SeekableInputStream sin; + private final SeekableInputStream sin; private long blockStart; private int[] partialMatchTable; @@ -69,10 +70,9 @@ public static FileReader openReader(SeekableInput in, DatumReader read length -= bytesRead; offset += bytesRead; } - in.seek(0); if (Arrays.equals(MAGIC, magic)) // current format - return new DataFileReader<>(in, reader); + return new DataFileReader<>(in, reader, magic); if (Arrays.equals(DataFileReader12.MAGIC, magic)) // 1.2 format return new DataFileReader12<>(in, reader); @@ -111,7 +111,7 @@ public static DataFileReader openReader(SeekableInput in, DatumReader *
        */
       public DataFileReader(File file, DatumReader reader) throws IOException {
    -    this(new SeekableFileInput(file), reader, true);
    +    this(new SeekableFileInput(file), reader, true, null);
       }
     
       /**
    @@ -128,15 +128,20 @@ public DataFileReader(File file, DatumReader reader) throws IOException {
        * 
        */
       public DataFileReader(SeekableInput sin, DatumReader reader) throws IOException {
    -    this(sin, reader, false);
    +    this(sin, reader, false, null);
    +  }
    +
    +  private DataFileReader(SeekableInput sin, DatumReader reader, byte[] magic) throws IOException {
    +    this(sin, reader, false, magic);
       }
     
       /** Construct a reader for a file. Please close resource files yourself. */
    -  protected DataFileReader(SeekableInput sin, DatumReader reader, boolean closeOnError) throws IOException {
    +  protected DataFileReader(SeekableInput sin, DatumReader reader, boolean closeOnError, byte[] magic)
    +      throws IOException {
         super(reader);
         try {
           this.sin = new SeekableInputStream(sin);
    -      initialize(this.sin);
    +      initialize(this.sin, magic);
           blockFinished();
         } catch (final Throwable e) {
           if (closeOnError) {
    @@ -153,7 +158,7 @@ protected DataFileReader(SeekableInput sin, DatumReader reader, boolean close
       protected DataFileReader(SeekableInput sin, DatumReader reader, Header header) throws IOException {
         super(reader);
         this.sin = new SeekableInputStream(sin);
    -    initialize(this.sin, header);
    +    initialize(header);
       }
     
       /**
    @@ -166,7 +171,7 @@ public void seek(long position) throws IOException {
         vin = DecoderFactory.get().binaryDecoder(this.sin, vin);
         datumIn = null;
         blockRemaining = 0;
    -    blockStart = position;
    +    blockFinished();
       }
     
       /**
    @@ -180,7 +185,7 @@ public void sync(final long position) throws IOException {
         seek(position);
         // work around an issue where 1.5.4 C stored sync in metadata
         if ((position == 0L) && (getMeta("avro.sync") != null)) {
    -      initialize(sin); // re-init to skip header
    +      initialize(sin, null); // re-init to skip header
           return;
         }
     
    @@ -259,9 +264,9 @@ public long tell() throws IOException {
     
       static class SeekableInputStream extends InputStream implements SeekableInput {
         private final byte[] oneByte = new byte[1];
    -    private SeekableInput in;
    +    private final SeekableInput in;
     
    -    SeekableInputStream(SeekableInput in) throws IOException {
    +    SeekableInputStream(SeekableInput in) {
           this.in = in;
         }
     
    @@ -305,15 +310,10 @@ public int read() throws IOException {
         @Override
         public long skip(long skip) throws IOException {
           long position = in.tell();
    +      long skipToPosition = position + skip;
           long length = in.length();
    -      long remaining = length - position;
    -      if (remaining > skip) {
    -        in.seek(skip);
    -        return in.tell() - position;
    -      } else {
    -        in.seek(remaining);
    -        return in.tell() - position;
    -      }
    +      in.seek(Math.min(skipToPosition, length));
    +      return in.tell() - position;
         }
     
         @Override
    @@ -325,7 +325,7 @@ public void close() throws IOException {
         @Override
         public int available() throws IOException {
           long remaining = (in.length() - in.tell());
    -      return (remaining > Integer.MAX_VALUE) ? Integer.MAX_VALUE : (int) remaining;
    +      return (int) Math.min(remaining, Integer.MAX_VALUE);
         }
       }
     }
    diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader12.java b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader12.java
    index f24a978d6f8..0c0a670ea40 100644
    --- a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader12.java
    +++ b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader12.java
    @@ -27,6 +27,7 @@
     import java.util.Map;
     
     import org.apache.avro.InvalidAvroMagicException;
    +import org.apache.avro.JsonSchemaParser;
     import org.apache.avro.Schema;
     import org.apache.avro.UnknownAvroCodecException;
     import org.apache.avro.io.DatumReader;
    @@ -61,6 +62,7 @@ public DataFileReader12(SeekableInput sin, DatumReader reader) throws IOExcep
         this.in = new DataFileReader.SeekableInputStream(sin);
     
         byte[] magic = new byte[4];
    +    in.seek(0); // seek to 0 to read magic header
         in.read(magic);
         if (!Arrays.equals(MAGIC, magic))
           throw new InvalidAvroMagicException("Not a data file.");
    @@ -87,7 +89,7 @@ public DataFileReader12(SeekableInput sin, DatumReader reader) throws IOExcep
         if (codec != null && !codec.equals(NULL_CODEC)) {
           throw new UnknownAvroCodecException("Unknown codec: " + codec);
         }
    -    this.schema = new Schema.Parser().parse(getMetaString(SCHEMA));
    +    this.schema = JsonSchemaParser.parseInternal(getMetaString(SCHEMA));
         this.reader = reader;
     
         reader.setSchema(schema);
    diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java
    index 8d2697104e2..f93ad8e5fdd 100644
    --- a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java
    +++ b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java
    @@ -17,14 +17,23 @@
      */
     package org.apache.avro.file;
     
    +import org.apache.avro.AvroRuntimeException;
    +import org.apache.avro.InvalidAvroMagicException;
    +import org.apache.avro.JsonSchemaParser;
    +import org.apache.avro.Schema;
    +import org.apache.avro.io.BinaryDecoder;
    +import org.apache.avro.io.BinaryEncoder;
    +import org.apache.avro.io.DatumReader;
    +import org.apache.avro.io.DecoderFactory;
    +
    +import java.io.Closeable;
     import java.io.EOFException;
     import java.io.IOException;
     import java.io.InputStream;
    -import java.io.Closeable;
     import java.nio.ByteBuffer;
     import java.nio.charset.StandardCharsets;
    -import java.util.Arrays;
     import java.util.ArrayList;
    +import java.util.Arrays;
     import java.util.Collections;
     import java.util.HashMap;
     import java.util.Iterator;
    @@ -32,14 +41,6 @@
     import java.util.Map;
     import java.util.NoSuchElementException;
     
    -import org.apache.avro.AvroRuntimeException;
    -import org.apache.avro.InvalidAvroMagicException;
    -import org.apache.avro.Schema;
    -import org.apache.avro.io.BinaryEncoder;
    -import org.apache.avro.io.DecoderFactory;
    -import org.apache.avro.io.BinaryDecoder;
    -import org.apache.avro.io.DatumReader;
    -
     /**
      * Streaming access to files written by {@link DataFileWriter}. Use
      * {@link DataFileReader} for file-based input.
    @@ -62,7 +63,7 @@ private Header() {
         }
       }
     
    -  private DatumReader reader;
    +  private final DatumReader reader;
       private long blockSize;
       private boolean availableBlock = false;
       private Header header;
    @@ -70,7 +71,8 @@ private Header() {
       /** Decoder on raw input stream. (Used for metadata.) */
       BinaryDecoder vin;
       /**
    -   * Secondary decoder, for datums. (Different than vin for block segments.)
    +   * Secondary decoder, for datums. (Different from `vin`, which is used for block
    +   * segments)
        */
       BinaryDecoder datumIn = null;
     
    @@ -87,28 +89,40 @@ private Header() {
        */
       public DataFileStream(InputStream in, DatumReader reader) throws IOException {
         this.reader = reader;
    -    initialize(in);
    +    initialize(in, null);
       }
     
       /**
        * create an uninitialized DataFileStream
        */
    -  protected DataFileStream(DatumReader reader) throws IOException {
    +  protected DataFileStream(DatumReader reader) {
         this.reader = reader;
       }
     
    -  /** Initialize the stream by reading from its head. */
    -  void initialize(InputStream in) throws IOException {
    -    this.header = new Header();
    -    this.vin = DecoderFactory.get().binaryDecoder(in, vin);
    +  byte[] readMagic() throws IOException {
    +    if (this.vin == null) {
    +      throw new IOException("InputStream is not initialized");
    +    }
         byte[] magic = new byte[DataFileConstants.MAGIC.length];
         try {
           vin.readFixed(magic); // read magic
         } catch (IOException e) {
           throw new IOException("Not an Avro data file.", e);
         }
    +    return magic;
    +  }
    +
    +  void validateMagic(byte[] magic) throws InvalidAvroMagicException {
         if (!Arrays.equals(DataFileConstants.MAGIC, magic))
           throw new InvalidAvroMagicException("Not an Avro data file.");
    +  }
    +
    +  /** Initialize the stream by reading from its head. */
    +  void initialize(InputStream in, byte[] magic) throws IOException {
    +    this.header = new Header();
    +    this.vin = DecoderFactory.get().binaryDecoder(in, vin);
    +    magic = (magic == null) ? readMagic() : magic;
    +    validateMagic(magic);
     
         long l = vin.readMapStart(); // read meta data
         if (l > 0) {
    @@ -127,14 +141,13 @@ void initialize(InputStream in) throws IOException {
     
         // finalize the header
         header.metaKeyList = Collections.unmodifiableList(header.metaKeyList);
    -    header.schema = new Schema.Parser().setValidate(false).setValidateDefaults(false)
    -        .parse(getMetaString(DataFileConstants.SCHEMA));
    +    header.schema = JsonSchemaParser.parseInternal(getMetaString(DataFileConstants.SCHEMA));
         this.codec = resolveCodec();
         reader.setSchema(header.schema);
       }
     
       /** Initialize the stream without reading from it. */
    -  void initialize(InputStream in, Header header) throws IOException {
    +  void initialize(Header header) {
         this.header = header;
         this.codec = resolveCodec();
         reader.setSchema(header.schema);
    @@ -262,6 +275,7 @@ public ByteBuffer nextBlock() throws IOException {
         if (blockRemaining != blockCount)
           throw new IllegalStateException("Not at block start.");
         blockRemaining = 0;
    +    blockFinished();
         datumIn = null;
         return blockBuffer;
       }
    @@ -289,7 +303,7 @@ boolean hasNextBlock() {
           blockRemaining = vin.readLong(); // read block count
           blockSize = vin.readLong(); // read block size
           if (blockSize > Integer.MAX_VALUE || blockSize < 0) {
    -        throw new IOException("Block size invalid or too large for this " + "implementation: " + blockSize);
    +        throw new IOException("Block size invalid or too large for this implementation: " + blockSize);
           }
           blockCount = blockRemaining;
           availableBlock = true;
    @@ -316,7 +330,10 @@ DataBlock nextRawBlock(DataBlock reuse) throws IOException {
         vin.readFixed(syncBuffer);
         availableBlock = false;
         if (!Arrays.equals(syncBuffer, header.sync))
    -      throw new IOException("Invalid sync!");
    +      throw new IOException("Invalid sync marker! The sync marker in the data block doesn't match the "
    +          + "file header's sync marker. This likely indicates data corruption, truncated file, "
    +          + "or incorrectly concatenated Avro files. Verify file integrity and ensure proper "
    +          + "file transmission or creation.");
         return reuse;
       }
     
    @@ -352,22 +369,6 @@ private DataBlock(long numEntries, int blockSize) {
           this.numEntries = numEntries;
         }
     
    -    byte[] getData() {
    -      return data;
    -    }
    -
    -    long getNumEntries() {
    -      return numEntries;
    -    }
    -
    -    int getBlockSize() {
    -      return blockSize;
    -    }
    -
    -    boolean isFlushOnWrite() {
    -      return flushOnWrite;
    -    }
    -
         void setFlushOnWrite(boolean flushOnWrite) {
           this.flushOnWrite = flushOnWrite;
         }
    diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileWriter.java b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileWriter.java
    index 05e5006acbf..466fa28135c 100644
    --- a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileWriter.java
    +++ b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileWriter.java
    @@ -17,32 +17,32 @@
      */
     package org.apache.avro.file;
     
    -import static java.nio.charset.StandardCharsets.UTF_8;
    +import org.apache.avro.AvroRuntimeException;
    +import org.apache.avro.Schema;
    +import org.apache.avro.file.DataFileStream.DataBlock;
    +import org.apache.avro.generic.GenericDatumReader;
    +import org.apache.avro.io.BinaryEncoder;
    +import org.apache.avro.io.DatumWriter;
    +import org.apache.avro.io.EncoderFactory;
    +import org.apache.avro.util.NonCopyingByteArrayOutputStream;
    +import org.apache.commons.io.IOUtils;
     
     import java.io.BufferedOutputStream;
     import java.io.Closeable;
     import java.io.File;
    +import java.io.FileOutputStream;
     import java.io.FilterOutputStream;
     import java.io.Flushable;
     import java.io.IOException;
     import java.io.OutputStream;
     import java.nio.ByteBuffer;
     import java.nio.charset.StandardCharsets;
    -import java.security.MessageDigest;
    -import java.security.NoSuchAlgorithmException;
    +import java.security.SecureRandom;
     import java.util.HashMap;
     import java.util.Map;
    -import java.util.UUID;
    +import java.util.function.Function;
     
    -import org.apache.avro.AvroRuntimeException;
    -import org.apache.avro.Schema;
    -import org.apache.avro.file.DataFileStream.DataBlock;
    -import org.apache.avro.generic.GenericDatumReader;
    -import org.apache.avro.io.BinaryEncoder;
    -import org.apache.avro.io.DatumWriter;
    -import org.apache.avro.io.EncoderFactory;
    -import org.apache.avro.util.NonCopyingByteArrayOutputStream;
    -import org.apache.commons.compress.utils.IOUtils;
    +import static java.nio.charset.StandardCharsets.UTF_8;
     
     /**
      * Stores in a file a sequence of data conforming to a schema. The schema is
    @@ -51,12 +51,12 @@
      * blocks. A synchronization marker is written between blocks, so that
      * files may be split. Blocks may be compressed. Extensible metadata is stored
      * at the end of the file. Files may be appended to.
    - * 
    + *
      * @see DataFileReader
      */
     public class DataFileWriter implements Closeable, Flushable {
       private Schema schema;
    -  private DatumWriter dout;
    +  private final DatumWriter dout;
     
       private OutputStream underlyingStream;
     
    @@ -72,6 +72,8 @@ public class DataFileWriter implements Closeable, Flushable {
     
       private byte[] sync; // 16 random bytes
       private int syncInterval = DataFileConstants.DEFAULT_SYNC_INTERVAL;
    +  private Function initEncoder = out -> new EncoderFactory().directBinaryEncoder(out,
    +      null);
     
       private boolean isOpen;
       private Codec codec;
    @@ -115,11 +117,10 @@ public DataFileWriter setCodec(CodecFactory c) {
        * is written. In this case, the {@linkplain #flush()} must be called to flush
        * the stream.
        *
    -   * Invalid values throw IllegalArgumentException
    -   *
        * @param syncInterval the approximate number of uncompressed bytes to write in
        *                     each block
        * @return this DataFileWriter
    +   * @throws IllegalArgumentException if syncInterval is invalid
        */
       public DataFileWriter setSyncInterval(int syncInterval) {
         if (syncInterval < 32 || syncInterval > (1 << 30)) {
    @@ -129,6 +130,17 @@ public DataFileWriter setSyncInterval(int syncInterval) {
         return this;
       }
     
    +  /**
    +   * Allows setting a different encoder than the default DirectBinaryEncoder.
    +   *
    +   * @param initEncoderFunc Function to create a binary encoder
    +   * @return this DataFileWriter
    +   */
    +  public DataFileWriter setEncoder(Function initEncoderFunc) {
    +    this.initEncoder = initEncoderFunc;
    +    return this;
    +  }
    +
       /** Open a new file for data matching a schema with a random sync. */
       public DataFileWriter create(Schema schema, File file) throws IOException {
         SyncableFileOutputStream sfos = new SyncableFileOutputStream(file);
    @@ -180,8 +192,8 @@ public DataFileWriter create(Schema schema, OutputStream outs, byte[] sync) t
        * Set whether this writer should flush the block to the stream every time a
        * sync marker is written. By default, the writer will flush the buffer each
        * time a sync marker is written (if the block size limit is reached or the
    -   * {@linkplain #sync()} is called.
    -   * 
    +   * {@linkplain #sync()} is called).
    +   *
        * @param flushOnEveryBlock - If set to false, this writer will not flush the
        *                          block to the stream until {@linkplain #flush()} is
        *                          explicitly called.
    @@ -211,7 +223,7 @@ public DataFileWriter appendTo(File file) throws IOException {
       /**
        * Open a writer appending to an existing file. Since 1.9.0 this method
        * does not close in.
    -   * 
    +   *
        * @param in  reading the existing file.
        * @param out positioned at the end of the existing file.
        */
    @@ -234,29 +246,40 @@ public DataFileWriter appendTo(SeekableInput in, OutputStream out) throws IOE
         return this;
       }
     
    -  private void init(OutputStream outs) throws IOException {
    +  private void init(OutputStream outs) {
         this.underlyingStream = outs;
    -    this.out = new BufferedFileOutputStream(outs);
    +    // Size the output buffer to fit an entire block frame in a single flush:
    +    // maxBlockSize() for compressed data + 20 bytes for two varint-encoded longs
    +    // (up to 10 bytes each) + sync.length for the sync marker
    +    this.out = new BufferedFileOutputStream(outs, maxBlockSize() + 20 + sync.length);
         EncoderFactory efactory = new EncoderFactory();
         this.vout = efactory.directBinaryEncoder(out, null);
         dout.setSchema(schema);
    -    buffer = new NonCopyingByteArrayOutputStream(Math.min((int) (syncInterval * 1.25), Integer.MAX_VALUE / 2 - 1));
    -    this.bufOut = efactory.directBinaryEncoder(buffer, null);
    +    buffer = new NonCopyingByteArrayOutputStream(maxBlockSize());
    +    this.bufOut = this.initEncoder.apply(buffer);
         if (this.codec == null) {
           this.codec = CodecFactory.nullCodec().createInstance();
         }
         this.isOpen = true;
       }
     
    +  /**
    +   * Returns the estimated maximum compressed block size. Blocks are flushed when
    +   * uncompressed data reaches {@link #syncInterval}, but compression may increase
    +   * size (e.g. uncompressible data with codec framing overhead), so we allow 25%
    +   * headroom. The result is clamped to avoid integer overflow when used for
    +   * buffer allocation.
    +   */
    +  private int maxBlockSize() {
    +    return Math.min((int) (syncInterval * 1.25), Integer.MAX_VALUE / 2 - 1);
    +  }
    +
    +  private static final SecureRandom RNG = new SecureRandom();
    +
       private static byte[] generateSync() {
    -    try {
    -      MessageDigest digester = MessageDigest.getInstance("MD5");
    -      long time = System.currentTimeMillis();
    -      digester.update((UUID.randomUUID() + "@" + time).getBytes(UTF_8));
    -      return digester.digest();
    -    } catch (NoSuchAlgorithmException e) {
    -      throw new RuntimeException(e);
    -    }
    +    byte[] sync = new byte[16];
    +    RNG.nextBytes(sync);
    +    return sync;
       }
     
       private DataFileWriter setMetaInternal(String key, byte[] value) {
    @@ -304,7 +327,7 @@ public AppendWriteException(Exception e) {
     
       /**
        * Append a datum to the file.
    -   * 
    +   *
        * @see AppendWriteException
        */
       public void append(D datum) throws IOException {
    @@ -365,7 +388,7 @@ private void writeIfBlockFull() throws IOException {
        * at compression level 7. If recompress is false, blocks will be copied
        * without changing the compression level. If true, they will be converted to
        * the new compression level.
    -   * 
    +   *
        * @param otherFile
        * @param recompress
        * @throws IOException
    @@ -439,10 +462,10 @@ public void flush() throws IOException {
       }
     
       /**
    -   * If this writer was instantiated using a File or using an
    -   * {@linkplain Syncable} instance, this method flushes all buffers for this
    -   * writer to disk. In other cases, this method behaves exactly like
    -   * {@linkplain #flush()}.
    +   * If this writer was instantiated using a {@linkplain File},
    +   * {@linkplain FileOutputStream} or {@linkplain Syncable} instance, this method
    +   * flushes all buffers for this writer to disk. In other cases, this method
    +   * behaves exactly like {@linkplain #flush()}.
        *
        * @throws IOException
        */
    @@ -450,6 +473,8 @@ public void fSync() throws IOException {
         flush();
         if (underlyingStream instanceof Syncable) {
           ((Syncable) underlyingStream).sync();
    +    } else if (underlyingStream instanceof FileOutputStream) {
    +      ((FileOutputStream) underlyingStream).getFD().sync();
         }
       }
     
    @@ -463,11 +488,11 @@ public void close() throws IOException {
         }
       }
     
    -  private class BufferedFileOutputStream extends BufferedOutputStream {
    +  private static class BufferedFileOutputStream extends BufferedOutputStream {
         private long position; // start of buffer
     
         private class PositionFilter extends FilterOutputStream {
    -      public PositionFilter(OutputStream out) throws IOException {
    +      public PositionFilter(OutputStream out) {
             super(out);
           }
     
    @@ -478,11 +503,16 @@ public void write(byte[] b, int off, int len) throws IOException {
           }
         }
     
    -    public BufferedFileOutputStream(OutputStream out) throws IOException {
    +    public BufferedFileOutputStream(OutputStream out) {
           super(null);
           this.out = new PositionFilter(out);
         }
     
    +    public BufferedFileOutputStream(OutputStream out, int bufferSize) {
    +      super(null, bufferSize);
    +      this.out = new PositionFilter(out);
    +    }
    +
         public long tell() {
           return position + count;
         }
    diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/DeflateCodec.java b/lang/java/avro/src/main/java/org/apache/avro/file/DeflateCodec.java
    index 87498d3ee82..56e3bc929d8 100644
    --- a/lang/java/avro/src/main/java/org/apache/avro/file/DeflateCodec.java
    +++ b/lang/java/avro/src/main/java/org/apache/avro/file/DeflateCodec.java
    @@ -27,6 +27,8 @@
     
     import org.apache.avro.util.NonCopyingByteArrayOutputStream;
     
    +import static org.apache.avro.util.NonCopyingByteArrayOutputStream.capacityLimitedOutputStream;
    +
     /**
      * Implements DEFLATE (RFC1951) compression and decompression.
      *
    @@ -40,7 +42,7 @@ public class DeflateCodec extends Codec {
       private static final int DEFAULT_BUFFER_SIZE = 8192;
     
       static class Option extends CodecFactory {
    -    private int compressionLevel;
    +    private final int compressionLevel;
     
         Option(int compressionLevel) {
           this.compressionLevel = compressionLevel;
    @@ -55,8 +57,8 @@ protected Codec createInstance() {
       private Deflater deflater;
       private Inflater inflater;
       // currently only do 'nowrap' -- RFC 1951, not zlib
    -  private boolean nowrap = true;
    -  private int compressionLevel;
    +  private final boolean nowrap = true;
    +  private final int compressionLevel;
     
       public DeflateCodec(int compressionLevel) {
         this.compressionLevel = compressionLevel;
    @@ -78,7 +80,7 @@ public ByteBuffer compress(ByteBuffer data) throws IOException {
     
       @Override
       public ByteBuffer decompress(ByteBuffer data) throws IOException {
    -    NonCopyingByteArrayOutputStream baos = new NonCopyingByteArrayOutputStream(DEFAULT_BUFFER_SIZE);
    +    NonCopyingByteArrayOutputStream baos = capacityLimitedOutputStream(DEFAULT_BUFFER_SIZE);
         try (OutputStream outputStream = new InflaterOutputStream(baos, getInflater())) {
           outputStream.write(data.array(), computeOffset(data), data.remaining());
         }
    diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/FileReader.java b/lang/java/avro/src/main/java/org/apache/avro/file/FileReader.java
    index 07229d59ee8..9a54cf055ef 100644
    --- a/lang/java/avro/src/main/java/org/apache/avro/file/FileReader.java
    +++ b/lang/java/avro/src/main/java/org/apache/avro/file/FileReader.java
    @@ -31,7 +31,7 @@ public interface FileReader extends Iterator, Iterable, Closeable {
     
       /**
        * Read the next datum from the file.
    -   * 
    +   *
        * @param reuse an instance to reuse.
        * @throws NoSuchElementException if no more remain in the file.
        */
    @@ -39,7 +39,7 @@ public interface FileReader extends Iterator, Iterable, Closeable {
     
       /**
        * Move to the next synchronization point after a position. To process a range
    -   * of file entires, call this with the starting position, then check
    +   * of file entries, call this with the starting position, then check
        * {@link #pastSync(long)} with the end point before each call to
        * {@link #next()}.
        */
    diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/SeekableByteArrayInput.java b/lang/java/avro/src/main/java/org/apache/avro/file/SeekableByteArrayInput.java
    index 991fc44b4e8..49994a9bc8e 100644
    --- a/lang/java/avro/src/main/java/org/apache/avro/file/SeekableByteArrayInput.java
    +++ b/lang/java/avro/src/main/java/org/apache/avro/file/SeekableByteArrayInput.java
    @@ -18,6 +18,7 @@
     package org.apache.avro.file;
     
     import java.io.ByteArrayInputStream;
    +import java.io.EOFException;
     import java.io.IOException;
     
     /** A {@link SeekableInput} backed with data in a byte array. */
    @@ -34,8 +35,12 @@ public long length() throws IOException {
     
       @Override
       public void seek(long p) throws IOException {
    -    this.reset();
    -    this.skip(p);
    +    if (p >= this.count) {
    +      throw new EOFException();
    +    }
    +    if (p >= 0) {
    +      this.pos = (int) p;
    +    }
       }
     
       @Override
    diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/SnappyCodec.java b/lang/java/avro/src/main/java/org/apache/avro/file/SnappyCodec.java
    index 72bf0b74822..95886013d6f 100644
    --- a/lang/java/avro/src/main/java/org/apache/avro/file/SnappyCodec.java
    +++ b/lang/java/avro/src/main/java/org/apache/avro/file/SnappyCodec.java
    @@ -24,9 +24,11 @@
     
     import org.xerial.snappy.Snappy;
     
    +import org.apache.avro.SystemLimitException;
    +
     /** * Implements Snappy compression and decompression. */
     public class SnappyCodec extends Codec {
    -  private CRC32 crc32 = new CRC32();
    +  private final CRC32 crc32 = new CRC32();
     
       static class Option extends CodecFactory {
         static {
    @@ -66,13 +68,15 @@ public ByteBuffer compress(ByteBuffer in) throws IOException {
       @Override
       public ByteBuffer decompress(ByteBuffer in) throws IOException {
         int offset = computeOffset(in);
    -    ByteBuffer out = ByteBuffer.allocate(Snappy.uncompressedLength(in.array(), offset, in.remaining() - 4));
    +    final int uncompressedLength = Snappy.uncompressedLength(in.array(), offset, in.remaining() - 4);
    +    SystemLimitException.checkMaxDecompressCapacity(SystemLimitException.MAX_DECOMPRESS_LENGTH, 0, uncompressedLength);
    +    ByteBuffer out = ByteBuffer.allocate(uncompressedLength);
         int size = Snappy.uncompress(in.array(), offset, in.remaining() - 4, out.array(), 0);
         ((Buffer) out).limit(size);
     
         crc32.reset();
         crc32.update(out.array(), 0, size);
    -    if (in.getInt(((Buffer) in).limit() - 4) != (int) crc32.getValue())
    +    if (in.getInt(in.limit() - 4) != (int) crc32.getValue())
           throw new IOException("Checksum failure");
     
         return out;
    diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/XZCodec.java b/lang/java/avro/src/main/java/org/apache/avro/file/XZCodec.java
    index 3052f2a4160..74ffaec3022 100644
    --- a/lang/java/avro/src/main/java/org/apache/avro/file/XZCodec.java
    +++ b/lang/java/avro/src/main/java/org/apache/avro/file/XZCodec.java
    @@ -26,7 +26,6 @@
     import org.apache.avro.util.NonCopyingByteArrayOutputStream;
     import org.apache.commons.compress.compressors.xz.XZCompressorInputStream;
     import org.apache.commons.compress.compressors.xz.XZCompressorOutputStream;
    -import org.apache.commons.compress.utils.IOUtils;
     
     /** * Implements xz compression and decompression. */
     public class XZCodec extends Codec {
    @@ -34,7 +33,7 @@ public class XZCodec extends Codec {
       private static final int DEFAULT_BUFFER_SIZE = 8192;
     
       static class Option extends CodecFactory {
    -    private int compressionLevel;
    +    private final int compressionLevel;
     
         Option(int compressionLevel) {
           this.compressionLevel = compressionLevel;
    @@ -46,7 +45,7 @@ protected Codec createInstance() {
         }
       }
     
    -  private int compressionLevel;
    +  private final int compressionLevel;
     
       public XZCodec(int compressionLevel) {
         this.compressionLevel = compressionLevel;
    @@ -68,11 +67,12 @@ public ByteBuffer compress(ByteBuffer data) throws IOException {
     
       @Override
       public ByteBuffer decompress(ByteBuffer data) throws IOException {
    -    NonCopyingByteArrayOutputStream baos = new NonCopyingByteArrayOutputStream(DEFAULT_BUFFER_SIZE);
    +    NonCopyingByteArrayOutputStream baos = NonCopyingByteArrayOutputStream
    +        .capacityLimitedOutputStream(DEFAULT_BUFFER_SIZE);
         InputStream bytesIn = new ByteArrayInputStream(data.array(), computeOffset(data), data.remaining());
     
         try (InputStream ios = new XZCompressorInputStream(bytesIn)) {
    -      IOUtils.copy(ios, baos);
    +      ios.transferTo(baos);
         }
         return baos.asByteBuffer();
       }
    diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/ZstandardCodec.java b/lang/java/avro/src/main/java/org/apache/avro/file/ZstandardCodec.java
    index f778b2fe356..ddd8dd6dffe 100644
    --- a/lang/java/avro/src/main/java/org/apache/avro/file/ZstandardCodec.java
    +++ b/lang/java/avro/src/main/java/org/apache/avro/file/ZstandardCodec.java
    @@ -24,7 +24,6 @@
     import java.nio.ByteBuffer;
     
     import org.apache.avro.util.NonCopyingByteArrayOutputStream;
    -import org.apache.commons.compress.utils.IOUtils;
     
     public class ZstandardCodec extends Codec {
       public final static int DEFAULT_COMPRESSION = 3;
    @@ -78,11 +77,12 @@ public ByteBuffer compress(ByteBuffer data) throws IOException {
     
       @Override
       public ByteBuffer decompress(ByteBuffer compressedData) throws IOException {
    -    NonCopyingByteArrayOutputStream baos = new NonCopyingByteArrayOutputStream(DEFAULT_BUFFER_SIZE);
    +    NonCopyingByteArrayOutputStream baos = NonCopyingByteArrayOutputStream
    +        .capacityLimitedOutputStream(DEFAULT_BUFFER_SIZE);
         InputStream bytesIn = new ByteArrayInputStream(compressedData.array(), computeOffset(compressedData),
             compressedData.remaining());
         try (InputStream ios = ZstandardLoader.input(bytesIn, useBufferPool)) {
    -      IOUtils.copy(ios, baos);
    +      ios.transferTo(baos);
         }
         return baos.asByteBuffer();
       }
    diff --git a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java
    index 8f7391f5934..0098d8da5e0 100644
    --- a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java
    +++ b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java
    @@ -26,14 +26,15 @@
     import java.util.AbstractList;
     import java.util.Arrays;
     import java.util.Collection;
    -import java.util.Collections;
     import java.util.HashMap;
     import java.util.IdentityHashMap;
     import java.util.Iterator;
     import java.util.LinkedHashMap;
     import java.util.List;
     import java.util.Map;
    -import java.util.WeakHashMap;
    +import java.util.ServiceLoader;
    +import java.util.UUID;
    +import java.util.concurrent.ConcurrentMap;
     
     import org.apache.avro.AvroMissingFieldException;
     import org.apache.avro.AvroRuntimeException;
    @@ -56,8 +57,12 @@
     import org.apache.avro.io.FastReaderBuilder;
     import org.apache.avro.util.Utf8;
     import org.apache.avro.util.internal.Accessor;
    +import org.apache.avro.generic.PrimitivesArrays.PrimitiveArray;
     
     import com.fasterxml.jackson.databind.JsonNode;
    +import org.apache.avro.util.springframework.ConcurrentReferenceHashMap;
    +
    +import static org.apache.avro.util.springframework.ConcurrentReferenceHashMap.ReferenceType.WEAK;
     
     /**
      * Utilities for generic Java data. See {@link GenericRecordBuilder} for a
    @@ -114,6 +119,7 @@ public GenericData() {
       /** For subclasses. GenericData does not use a ClassLoader. */
       public GenericData(ClassLoader classLoader) {
         this.classLoader = (classLoader != null) ? classLoader : getClass().getClassLoader();
    +    loadConversions();
       }
     
       /** Return the class loader that's used (by subclasses). */
    @@ -121,9 +127,20 @@ public ClassLoader getClassLoader() {
         return classLoader;
       }
     
    -  private Map> conversions = new HashMap<>();
    +  /**
    +   * Use the Java 6 ServiceLoader to load conversions.
    +   *
    +   * @see #addLogicalTypeConversion(Conversion)
    +   */
    +  private void loadConversions() {
    +    for (Conversion conversion : ServiceLoader.load(Conversion.class, classLoader)) {
    +      addLogicalTypeConversion(conversion);
    +    }
    +  }
     
    -  private Map, Map>> conversionsByClass = new IdentityHashMap<>();
    +  private final Map> conversions = new HashMap<>();
    +
    +  private final Map, Map>> conversionsByClass = new IdentityHashMap<>();
     
       public Collection> getConversions() {
         return conversions.values();
    @@ -131,19 +148,17 @@ public Collection> getConversions() {
     
       /**
        * Registers the given conversion to be used when reading and writing with this
    -   * data model.
    +   * data model. Conversions can also be registered automatically, as documented
    +   * on the class {@link Conversion Conversion<T>}.
        *
        * @param conversion a logical type Conversion.
        */
       public void addLogicalTypeConversion(Conversion conversion) {
         conversions.put(conversion.getLogicalTypeName(), conversion);
         Class type = conversion.getConvertedType();
    -    Map> conversions = conversionsByClass.get(type);
    -    if (conversions == null) {
    -      conversions = new LinkedHashMap<>();
    -      conversionsByClass.put(type, conversions);
    -    }
    -    conversions.put(conversion.getLogicalTypeName(), conversion);
    +    Map> conversionsForClass = conversionsByClass.computeIfAbsent(type,
    +        k -> new LinkedHashMap<>());
    +    conversionsForClass.put(conversion.getLogicalTypeName(), conversion);
       }
     
       /**
    @@ -184,15 +199,15 @@ public  Conversion getConversionByClass(Class datumClass, LogicalType l
        * @return the conversion for the logical type, or null
        */
       @SuppressWarnings("unchecked")
    -  public Conversion getConversionFor(LogicalType logicalType) {
    +  public  Conversion getConversionFor(LogicalType logicalType) {
         if (logicalType == null) {
           return null;
         }
    -    return (Conversion) conversions.get(logicalType.getName());
    +    return (Conversion) conversions.get(logicalType.getName());
       }
     
       public static final String FAST_READER_PROP = "org.apache.avro.fastread";
    -  private boolean fastReaderEnabled = "true".equalsIgnoreCase(System.getProperty(FAST_READER_PROP));
    +  private boolean fastReaderEnabled = "true".equalsIgnoreCase(System.getProperty(FAST_READER_PROP, "true"));
       private FastReaderBuilder fastReaderBuilder = null;
     
       public GenericData setFastReaderEnabled(boolean flag) {
    @@ -303,30 +318,16 @@ public String toString() {
         }
       }
     
    -  /** Default implementation of an array. */
    -  @SuppressWarnings(value = "unchecked")
    -  public static class Array extends AbstractList implements GenericArray, Comparable> {
    -    private static final Object[] EMPTY = new Object[0];
    +  public static abstract class AbstractArray extends AbstractList
    +      implements GenericArray, Comparable> {
         private final Schema schema;
    -    private int size;
    -    private Object[] elements = EMPTY;
     
    -    public Array(int capacity, Schema schema) {
    -      if (schema == null || !Type.ARRAY.equals(schema.getType()))
    -        throw new AvroRuntimeException("Not an array schema: " + schema);
    -      this.schema = schema;
    -      if (capacity != 0)
    -        elements = new Object[capacity];
    -    }
    +    protected int size = 0;
     
    -    public Array(Schema schema, Collection c) {
    +    public AbstractArray(Schema schema) {
           if (schema == null || !Type.ARRAY.equals(schema.getType()))
             throw new AvroRuntimeException("Not an array schema: " + schema);
           this.schema = schema;
    -      if (c != null) {
    -        elements = new Object[c.size()];
    -        addAll(c);
    -      }
         }
     
         @Override
    @@ -340,27 +341,31 @@ public int size() {
         }
     
         @Override
    -    public void clear() {
    -      // Let GC do its work
    -      Arrays.fill(elements, 0, size, null);
    +    public void reset() {
           size = 0;
         }
     
         @Override
    -    public void reset() {
    -      size = 0;
    +    public int compareTo(GenericArray that) {
    +      return GenericData.get().compare(this, that, this.getSchema());
         }
     
         @Override
    -    public void prune() {
    -      if (size < elements.length) {
    -        Arrays.fill(elements, size, elements.length, null);
    +    public boolean equals(final Object o) {
    +      if (!(o instanceof Collection)) {
    +        return false;
           }
    +      return GenericData.get().compare(this, o, this.getSchema(), true) == 0;
    +    }
    +
    +    @Override
    +    public int hashCode() {
    +      return super.hashCode();
         }
     
         @Override
         public Iterator iterator() {
    -      return new Iterator() {
    +      return new Iterator<>() {
             private int position = 0;
     
             @Override
    @@ -370,7 +375,7 @@ public boolean hasNext() {
     
             @Override
             public T next() {
    -          return (T) elements[position++];
    +          return AbstractArray.this.get(position++);
             }
     
             @Override
    @@ -380,6 +385,57 @@ public void remove() {
           };
         }
     
    +    @Override
    +    public void reverse() {
    +      int left = 0;
    +      int right = size - 1;
    +
    +      while (left < right) {
    +        this.swap(left, right);
    +
    +        left++;
    +        right--;
    +      }
    +    }
    +
    +    protected abstract void swap(int index1, int index2);
    +  }
    +
    +  /** Default implementation of an array. */
    +  @SuppressWarnings(value = "unchecked")
    +  public static class Array extends AbstractArray {
    +    private static final Object[] EMPTY = new Object[0];
    +
    +    private Object[] elements = EMPTY;
    +
    +    public Array(int capacity, Schema schema) {
    +      super(schema);
    +      if (capacity != 0)
    +        elements = new Object[capacity];
    +    }
    +
    +    public Array(Schema schema, Collection c) {
    +      super(schema);
    +      if (c != null) {
    +        elements = new Object[c.size()];
    +        addAll(c);
    +      }
    +    }
    +
    +    @Override
    +    public void clear() {
    +      // Let GC do its work
    +      Arrays.fill(elements, 0, size, null);
    +      size = 0;
    +    }
    +
    +    @Override
    +    public void prune() {
    +      if (size < elements.length) {
    +        Arrays.fill(elements, size, elements.length, null);
    +      }
    +    }
    +
         @Override
         public T get(int i) {
           if (i >= size)
    @@ -428,23 +484,10 @@ public T peek() {
         }
     
         @Override
    -    public int compareTo(GenericArray that) {
    -      return GenericData.get().compare(this, that, this.getSchema());
    -    }
    -
    -    @Override
    -    public void reverse() {
    -      int left = 0;
    -      int right = elements.length - 1;
    -
    -      while (left < right) {
    -        Object tmp = elements[left];
    -        elements[left] = elements[right];
    -        elements[right] = tmp;
    -
    -        left++;
    -        right--;
    -      }
    +    protected void swap(final int index1, final int index2) {
    +      Object tmp = elements[index1];
    +      elements[index1] = elements[index2];
    +      elements[index2] = tmp;
         }
       }
     
    @@ -509,8 +552,8 @@ public int compareTo(Fixed that) {
     
       /** Default implementation of {@link GenericEnumSymbol}. */
       public static class EnumSymbol implements GenericEnumSymbol {
    -    private Schema schema;
    -    private String symbol;
    +    private final Schema schema;
    +    private final String symbol;
     
         public EnumSymbol(Schema schema, String symbol) {
           this.schema = schema;
    @@ -704,7 +747,7 @@ protected void toString(Object datum, StringBuilder buffer, IdentityHashMap a = (Collection) o;
    +        Schema elementType = s.getElementType();
    +        for (Object e : a) {
    +          if (this.shouldStop()) {
    +            return currentHashCode;
    +          }
    +          currentHashCode = this.hashCodeAdd(e, elementType);
    +        }
    +        return currentHashCode;
    +      case UNION:
    +        return hashCode(o, s.getTypes().get(GenericData.this.resolveUnion(s, o)));
    +      case ENUM:
    +        return s.getEnumOrdinal(o.toString());
    +      case NULL:
    +        return 0;
    +      case STRING:
    +        return (o instanceof Utf8 ? o : new Utf8(o.toString())).hashCode();
    +      default:
    +        return o.hashCode();
           }
    -      return hashCode;
    -    case ARRAY:
    -      Collection a = (Collection) o;
    -      Schema elementType = s.getElementType();
    -      for (Object e : a)
    -        hashCode = hashCodeAdd(hashCode, e, elementType);
    -      return hashCode;
    -    case UNION:
    -      return hashCode(o, s.getTypes().get(resolveUnion(s, o)));
    -    case ENUM:
    -      return s.getEnumOrdinal(o.toString());
    -    case NULL:
    -      return 0;
    -    case STRING:
    -      return (o instanceof Utf8 ? o : new Utf8(o.toString())).hashCode();
    -    default:
    -      return o.hashCode();
         }
    -  }
     
    -  /** Add the hash code for an object into an accumulated hash code. */
    -  protected int hashCodeAdd(int hashCode, Object o, Schema s) {
    -    return 31 * hashCode + hashCode(o, s);
    +    /** Add the hash code for an object into an accumulated hash code. */
    +    protected int hashCodeAdd(Object o, Schema s) {
    +      return 31 * this.currentHashCode + hashCode(o, s);
    +    }
    +
    +    private boolean shouldStop() {
    +      return --counter <= 0;
    +    }
       }
     
       /**
    @@ -1136,6 +1202,71 @@ public int compare(Object o1, Object o2, Schema s) {
         return compare(o1, o2, s, false);
       }
     
    +  protected int compareMaps(final Map m1, final Map m2) {
    +    if (m1 == m2) {
    +      return 0;
    +    }
    +
    +    if (m1.isEmpty() && m2.isEmpty()) {
    +      return 0;
    +    }
    +
    +    if (m1.size() != m2.size()) {
    +      return 1;
    +    }
    +
    +    /**
    +     * Peek at keys, assuming they're all the same type within a Map
    +     */
    +    final Object key1 = m1.keySet().iterator().next();
    +    final Object key2 = m2.keySet().iterator().next();
    +    boolean utf8ToString = false;
    +    boolean stringToUtf8 = false;
    +
    +    if (key1 instanceof Utf8 && key2 instanceof String) {
    +      utf8ToString = true;
    +    } else if (key1 instanceof String && key2 instanceof Utf8) {
    +      stringToUtf8 = true;
    +    }
    +
    +    try {
    +      for (Map.Entry e : m1.entrySet()) {
    +        final Object key = e.getKey();
    +        Object lookupKey = key;
    +        if (utf8ToString) {
    +          lookupKey = key.toString();
    +        } else if (stringToUtf8) {
    +          lookupKey = new Utf8((String) lookupKey);
    +        }
    +        final Object value = e.getValue();
    +        if (value == null) {
    +          if (!(m2.get(lookupKey) == null && m2.containsKey(lookupKey))) {
    +            return 1;
    +          }
    +        } else {
    +          final Object value2 = m2.get(lookupKey);
    +          if (value instanceof Utf8 && value2 instanceof String) {
    +            if (!value.toString().equals(value2)) {
    +              return 1;
    +            }
    +          } else if (value instanceof String && value2 instanceof Utf8) {
    +            if (!new Utf8((String) value).equals(value2)) {
    +              return 1;
    +            }
    +          } else {
    +            if (!value.equals(value2)) {
    +              return 1;
    +            }
    +          }
    +        }
    +      }
    +    } catch (ClassCastException | NullPointerException unused) {
    +      return 1;
    +    }
    +
    +    return 0;
    +  }
    +
       /**
        * Comparison implementation. When equals is true, only checks for equality, not
        * for order.
    @@ -1172,7 +1303,7 @@ protected int compare(Object o1, Object o2, Schema s, boolean equals) {
           return e1.hasNext() ? 1 : (e2.hasNext() ? -1 : 0);
         case MAP:
           if (equals)
    -        return o1.equals(o2) ? 0 : 1;
    +        return compareMaps((Map) o1, (Map) o2);
           throw new AvroRuntimeException("Can't compare maps!");
         case UNION:
           int i1 = resolveUnion(s, o1);
    @@ -1181,15 +1312,15 @@ protected int compare(Object o1, Object o2, Schema s, boolean equals) {
         case NULL:
           return 0;
         case STRING:
    -      Utf8 u1 = o1 instanceof Utf8 ? (Utf8) o1 : new Utf8(o1.toString());
    -      Utf8 u2 = o2 instanceof Utf8 ? (Utf8) o2 : new Utf8(o2.toString());
    -      return u1.compareTo(u2);
    +      CharSequence cs1 = o1 instanceof CharSequence ? (CharSequence) o1 : o1.toString();
    +      CharSequence cs2 = o2 instanceof CharSequence ? (CharSequence) o2 : o2.toString();
    +      return Utf8.compareSequences(cs1, cs2);
         default:
           return ((Comparable) o1).compareTo(o2);
         }
       }
     
    -  private final Map defaultValueCache = Collections.synchronizedMap(new WeakHashMap<>());
    +  private final ConcurrentMap defaultValueCache = new ConcurrentReferenceHashMap<>(128, WEAK);
     
       /**
        * Gets the default value of the given field, if any.
    @@ -1209,28 +1340,20 @@ public Object getDefaultValue(Field field) {
         }
     
         // Check the cache
    -    Object defaultValue = defaultValueCache.get(field);
    -
         // If not cached, get the default Java value by encoding the default JSON
         // value and then decoding it:
    -    if (defaultValue == null)
    +    return defaultValueCache.computeIfAbsent(field, fieldToGetValueFor -> {
           try {
             ByteArrayOutputStream baos = new ByteArrayOutputStream();
             BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(baos, null);
    -        Accessor.encode(encoder, field.schema(), json);
    +        Accessor.encode(encoder, fieldToGetValueFor.schema(), json);
             encoder.flush();
             BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(baos.toByteArray(), null);
    -        defaultValue = createDatumReader(field.schema()).read(null, decoder);
    -
    -        // this MAY result in two threads creating the same defaultValue
    -        // and calling put. The last thread will win. However,
    -        // that's not an issue.
    -        defaultValueCache.put(field, defaultValue);
    +        return createDatumReader(fieldToGetValueFor.schema()).read(null, decoder);
           } catch (IOException e) {
             throw new AvroRuntimeException(e);
           }
    -
    -    return defaultValue;
    +    });
       }
     
       private static final Schema STRINGS = Schema.create(Type.STRING);
    @@ -1391,22 +1514,73 @@ else if (value instanceof Utf8) {
     
       }
     
    -  /*
    +  /**
        * Called to create new array instances. Subclasses may override to use a
    -   * different array implementation. By default, this returns a {@link
    -   * GenericData.Array}.
    +   * different array implementation. By default, this returns a
    +   * {@link GenericData.Array}.
    +   *
    +   * @param old    the old array instance to reuse, if possible. If the old array
    +   *               is an appropriate type, it may be cleared and returned.
    +   * @param size   the size of the array to create.
    +   * @param schema the schema of the array elements.
        */
       public Object newArray(Object old, int size, Schema schema) {
    -    if (old instanceof GenericArray) {
    -      ((GenericArray) old).reset();
    -      return old;
    -    } else if (old instanceof Collection) {
    -      ((Collection) old).clear();
    -      return old;
    -    } else
    -      return new GenericData.Array(size, schema);
    +    final var logicalType = schema.getElementType().getLogicalType();
    +    final var conversion = getConversionFor(logicalType);
    +    final var optimalValueType = optimalValueType(schema, logicalType,
    +        conversion == null ? null : conversion.getConvertedType());
    +
    +    if (old != null) {
    +      if (old instanceof GenericData.Array) {
    +        ((GenericData.Array) old).reset();
    +        return old;
    +      } else if (old instanceof PrimitiveArray) {
    +        var primitiveOld = (PrimitiveArray) old;
    +        if (primitiveOld.valueType() == optimalValueType) {
    +          primitiveOld.reset();
    +          return old;
    +        }
    +      } else if (old instanceof Collection) {
    +        ((Collection) old).clear();
    +        return old;
    +      }
    +    }
    +    // we can't reuse the old array, so we create a new one
    +    return PrimitivesArrays.createOptimizedArray(size, schema, optimalValueType);
    +  }
    +
    +  /**
    +   * Determine the optimal value type for an array. The value type is determined
    +   * form the convertedElementType if supplied, otherwise the underlying type from
    +   * the schema
    +   *
    +   * @param schema               the schema of the array
    +   * @param convertedElementType the converted elements value type. This may not
    +   *                             be the same and the schema if for instance there
    +   *                             is a logical type, and a convertor is use
    +   * @return an indicator for the type of the array, useful for
    +   *         {@link PrimitivesArrays#createOptimizedArray(int, Schema, Schema.Type)}.
    +   *         May be null if the type is not optimised
    +   */
    +  public static Schema.Type optimalValueType(Schema schema, LogicalType logicalType, Class convertedElementType) {
    +    if (logicalType == null)
    +      // if there are no logical types- use the schema type
    +      return schema.getElementType().getType();
    +    else if (convertedElementType == null)
    +      // if there is no convertor
    +      return null;
    +    else
    +      // use the converted type
    +      return PRIMITIVE_TYPES_WITH_SPECIALISED_ARRAYS.get(convertedElementType);
       }
     
    +  private final static Map, Schema.Type> PRIMITIVE_TYPES_WITH_SPECIALISED_ARRAYS = Map.of(//
    +      Long.TYPE, Schema.Type.LONG, //
    +      Integer.TYPE, Schema.Type.INT, //
    +      Float.TYPE, Schema.Type.FLOAT, //
    +      Double.TYPE, Schema.Type.DOUBLE, //
    +      Boolean.TYPE, Schema.Type.BOOLEAN);
    +
       /**
        * Called to create new array instances. Subclasses may override to use a
        * different map implementation. By default, this returns a {@link HashMap}.
    diff --git a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumReader.java b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumReader.java
    index 83e5e80966f..178ca50ccad 100644
    --- a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumReader.java
    +++ b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumReader.java
    @@ -17,16 +17,21 @@
      */
     package org.apache.avro.generic;
     
    +import java.io.EOFException;
     import java.io.IOException;
    -import java.nio.ByteBuffer;
     import java.lang.reflect.Constructor;
    -import java.lang.reflect.InvocationTargetException;
    +import java.nio.ByteBuffer;
     import java.util.Collection;
    +import java.util.Collections;
     import java.util.HashMap;
     import java.util.IdentityHashMap;
     import java.util.Map;
    +import java.util.Set;
    +import java.util.concurrent.ConcurrentHashMap;
    +import java.util.function.Function;
     
     import org.apache.avro.AvroRuntimeException;
    +import org.apache.avro.AvroTypeException;
     import org.apache.avro.Conversion;
     import org.apache.avro.Conversions;
     import org.apache.avro.LogicalType;
    @@ -38,6 +43,7 @@
     import org.apache.avro.io.ResolvingDecoder;
     import org.apache.avro.util.Utf8;
     import org.apache.avro.util.WeakIdentityHashMap;
    +import org.apache.avro.util.internal.ThreadLocalWithInitial;
     
     /** {@link DatumReader} for generic Java objects. */
     public class GenericDatumReader implements DatumReader {
    @@ -105,8 +111,8 @@ public void setExpected(Schema reader) {
         creatorResolver = null;
       }
     
    -  private static final ThreadLocal>> RESOLVER_CACHE = ThreadLocal
    -      .withInitial(WeakIdentityHashMap::new);
    +  private static final ThreadLocal>> RESOLVER_CACHE = ThreadLocalWithInitial
    +      .of(WeakIdentityHashMap::new);
     
       /**
        * Gets a resolving decoder for use by this GenericDatumReader. Unstable API.
    @@ -210,7 +216,7 @@ protected Object readWithoutConversion(Object old, Schema expected, ResolvingDec
       }
     
       /**
    -   * Convert a underlying representation of a logical type (such as a ByteBuffer)
    +   * Convert an underlying representation of a logical type (such as a ByteBuffer)
        * to a higher level object (such as a BigDecimal).
        *
        * @throws IllegalArgumentException if a null schema or logicalType is passed in
    @@ -256,7 +262,12 @@ protected Object readRecord(Object old, Schema expected, ResolvingDecoder in) th
        */
       protected void readField(Object record, Field field, Object oldDatum, ResolvingDecoder in, Object state)
           throws IOException {
    -    data.setField(record, field.name(), field.pos(), read(oldDatum, field.schema(), in), state);
    +    try {
    +      data.setField(record, field.name(), field.pos(), read(oldDatum, field.schema(), in), state);
    +    } catch (AvroTypeException exception) {
    +      String message = "Field \"" + field.name() + "\" content mismatch: " + exception.getMessage();
    +      throw new AvroTypeException(message, exception.getCause());
    +    }
       }
     
       /**
    @@ -284,6 +295,7 @@ protected Object readArray(Object old, Schema expected, ResolvingDecoder in) thr
         long l = in.readArrayStart();
         long base = 0;
         if (l > 0) {
    +      ensureAvailableCollectionBytes(in, l, expectedType);
           LogicalType logicalType = expectedType.getLogicalType();
           Conversion conversion = getData().getConversionFor(logicalType);
           Object array = newArray(old, (int) l, expected);
    @@ -299,13 +311,23 @@ protected Object readArray(Object old, Schema expected, ResolvingDecoder in) thr
               }
             }
             base += l;
    -      } while ((l = in.arrayNext()) > 0);
    +      } while ((l = arrayNext(in, expectedType)) > 0);
           return pruneArray(array);
         } else {
           return pruneArray(newArray(old, 0, expected));
         }
       }
     
    +  /**
    +   * Reads the next array block count and validates remaining bytes before the
    +   * caller allocates storage.
    +   */
    +  private long arrayNext(ResolvingDecoder in, Schema elementType) throws IOException {
    +    long l = in.arrayNext();
    +    ensureAvailableCollectionBytes(in, l, elementType);
    +    return l;
    +  }
    +
       private Object pruneArray(Object object) {
         if (object instanceof GenericArray) {
           ((GenericArray) object).prune();
    @@ -341,6 +363,7 @@ protected Object readMap(Object old, Schema expected, ResolvingDecoder in) throw
         long l = in.readMapStart();
         LogicalType logicalType = eValue.getLogicalType();
         Conversion conversion = getData().getConversionFor(logicalType);
    +    ensureAvailableMapBytes(in, l, eValue);
         Object map = newMap(old, (int) l);
         if (l > 0) {
           do {
    @@ -354,11 +377,39 @@ protected Object readMap(Object old, Schema expected, ResolvingDecoder in) throw
                 addToMap(map, readMapKey(null, expected, in), readWithoutConversion(null, eValue, in));
               }
             }
    -      } while ((l = in.mapNext()) > 0);
    +      } while ((l = mapNext(in, eValue)) > 0);
         }
         return map;
       }
     
    +  /**
    +   * Reads the next map block count and validates remaining bytes before the
    +   * caller allocates storage.
    +   */
    +  private long mapNext(ResolvingDecoder in, Schema valueType) throws IOException {
    +    long l = in.mapNext();
    +    ensureAvailableMapBytes(in, l, valueType);
    +    return l;
    +  }
    +
    +  /**
    +   * Validates remaining bytes for a map block. Each map entry has a string key
    +   * (at least 1 byte for the length varint) plus a value, so the minimum bytes
    +   * per entry is {@code 1 + minBytesPerElement(valueSchema)}.
    +   */
    +  private static void ensureAvailableMapBytes(Decoder decoder, long count, Schema valueSchema) throws EOFException {
    +    if (count <= 0) {
    +      return;
    +    }
    +    // Map keys are always strings: at least 1 byte for the length varint
    +    long minBytesPerEntry = 1L + minBytesPerElement(valueSchema);
    +    int remaining = decoder.remainingBytes();
    +    if (remaining >= 0 && count * minBytesPerEntry > remaining) {
    +      throw new EOFException("Map claims " + count + " entries with at least " + minBytesPerEntry
    +          + " bytes each, but only " + remaining + " bytes are available");
    +    }
    +  }
    +
       /**
        * Called by the default implementation of {@link #readMap} to read a key value.
        * The default implementation returns delegates to
    @@ -377,6 +428,76 @@ protected void addToMap(Object map, Object key, Object value) {
         ((Map) map).put(key, value);
       }
     
    +  /**
    +   * Returns the minimum number of bytes required to encode a single value of the
    +   * given schema in Avro binary format. Used to validate that the decoder has
    +   * enough data remaining before allocating collection backing arrays.
    +   * 

    + * Returns 0 for types whose binary encoding is empty ({@code null}, zero-length + * {@code fixed}, records with only zero-byte fields). Returns a positive value + * for all other types. + */ + static int minBytesPerElement(Schema schema) { + return minBytesPerElement(schema, Collections.newSetFromMap(new IdentityHashMap<>())); + } + + private static int minBytesPerElement(Schema schema, Set visited) { + switch (schema.getType()) { + case NULL: + return 0; + case FIXED: + return schema.getFixedSize(); + case FLOAT: + return 4; + case DOUBLE: + return 8; + case RECORD: + if (!visited.add(schema)) { + return 0; // break recursion for self-referencing schemas + } + long sum = 0; + for (Schema.Field f : schema.getFields()) { + sum += minBytesPerElement(f.schema(), visited); + if (sum >= Integer.MAX_VALUE) { + sum = Integer.MAX_VALUE; + break; + } + } + visited.remove(schema); + return (int) sum; + case UNION: + // The branch index varint is always at least 1 byte + return 1; + default: + // BOOLEAN, INT, LONG, ENUM, STRING, BYTES, ARRAY, MAP are all >= 1 byte + return 1; + } + } + + /** + * Validates that the decoder has enough remaining bytes to hold {@code count} + * elements of the given schema, assuming each element requires at least + * {@link #minBytesPerElement} bytes. Throws {@link EOFException} if the decoder + * reports fewer remaining bytes than required. + *

    + * This check prevents out-of-memory errors from pre-allocating huge backing + * arrays when the source data is truncated or malicious. + */ + private static void ensureAvailableCollectionBytes(Decoder decoder, long count, Schema elementSchema) + throws EOFException { + if (count <= 0) { + return; + } + int minBytes = minBytesPerElement(elementSchema); + if (minBytes > 0) { + int remaining = decoder.remainingBytes(); + if (remaining >= 0 && count * (long) minBytes > remaining) { + throw new EOFException("Collection claims " + count + " elements with at least " + minBytes + + " bytes each, but only " + remaining + " bytes are available"); + } + } + } + /** * Called to read a fixed value. May be overridden for alternate fixed * representations. By default, returns {@link GenericFixed}. @@ -451,14 +572,14 @@ protected Object newMap(Object old, int size) { * representation. By default, this calls {@link #readString(Object,Decoder)}. */ protected Object readString(Object old, Schema expected, Decoder in) throws IOException { - Class stringClass = getStringClass(expected); + Class stringClass = this.getReaderCache().getStringClass(expected); if (stringClass == String.class) { return in.readString(); } if (stringClass == CharSequence.class) { return readString(old, in); } - return newInstanceFromString(stringClass, in.readString()); + return this.newInstanceFromString(stringClass, in.readString()); } /** @@ -497,32 +618,86 @@ protected Class findStringClass(Schema schema) { } } - private Map stringClassCache = new IdentityHashMap<>(); + /** + * This class is used to reproduce part of IdentityHashMap in ConcurrentHashMap + * code. + */ + private static final class IdentitySchemaKey { + private final Schema schema; + + private final int hashcode; + + public IdentitySchemaKey(Schema schema) { + this.schema = schema; + this.hashcode = System.identityHashCode(schema); + } - private Class getStringClass(Schema s) { - Class c = stringClassCache.get(s); - if (c == null) { - c = findStringClass(s); - stringClassCache.put(s, c); + @Override + public int hashCode() { + return this.hashcode; + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof GenericDatumReader.IdentitySchemaKey)) { + return false; + } + IdentitySchemaKey key = (IdentitySchemaKey) obj; + return this == key || this.schema == key.schema; } - return c; } - private final Map stringCtorCache = new HashMap<>(); + // VisibleForTesting + static class ReaderCache { + private final Map stringClassCache = new ConcurrentHashMap<>(); - @SuppressWarnings("unchecked") - protected Object newInstanceFromString(Class c, String s) { - try { - Constructor ctor = stringCtorCache.get(c); - if (ctor == null) { + private final Map> stringCtorCache = new ConcurrentHashMap<>(); + + private final Function findStringClass; + + public ReaderCache(Function findStringClass) { + this.findStringClass = findStringClass; + } + + public Object newInstanceFromString(Class c, String s) { + final Function ctor = stringCtorCache.computeIfAbsent(c, this::buildFunction); + return ctor.apply(s); + } + + private Function buildFunction(Class c) { + final Constructor ctor; + try { ctor = c.getDeclaredConstructor(String.class); - ctor.setAccessible(true); - stringCtorCache.put(c, ctor); + } catch (NoSuchMethodException e) { + throw new AvroRuntimeException(e); } - return ctor.newInstance(s); - } catch (NoSuchMethodException | InvocationTargetException | IllegalAccessException | InstantiationException e) { - throw new AvroRuntimeException(e); + ctor.setAccessible(true); + + return (String s) -> { + try { + return ctor.newInstance(s); + } catch (ReflectiveOperationException e) { + throw new AvroRuntimeException(e); + } + }; } + + public Class getStringClass(final Schema s) { + final IdentitySchemaKey key = new IdentitySchemaKey(s); + return this.stringClassCache.computeIfAbsent(key, (IdentitySchemaKey k) -> this.findStringClass.apply(k.schema)); + } + } + + private final ReaderCache readerCache = new ReaderCache(this::findStringClass); + + // VisibleForTesting + ReaderCache getReaderCache() { + return readerCache; + } + + @SuppressWarnings("unchecked") + protected Object newInstanceFromString(Class c, String s) { + return this.getReaderCache().newInstanceFromString(c, s); } /** diff --git a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumWriter.java b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumWriter.java index 77d01e98c2c..20a856c4dc3 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumWriter.java +++ b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumWriter.java @@ -32,9 +32,17 @@ import org.apache.avro.LogicalType; import org.apache.avro.Schema; import org.apache.avro.Schema.Field; +import org.apache.avro.path.TracingAvroTypeException; import org.apache.avro.UnresolvedUnionException; import org.apache.avro.io.DatumWriter; import org.apache.avro.io.Encoder; +import org.apache.avro.path.ArrayPositionPredicate; +import org.apache.avro.path.LocationStep; +import org.apache.avro.path.MapKeyPredicate; +import org.apache.avro.path.TracingClassCastException; +import org.apache.avro.path.TracingNullPointException; +import org.apache.avro.path.UnionTypePredicate; +import org.apache.avro.util.SchemaUtil; /** {@link DatumWriter} for generic Java objects. */ public class GenericDatumWriter implements DatumWriter { @@ -70,7 +78,11 @@ public void setSchema(Schema root) { public void write(D datum, Encoder out) throws IOException { Objects.requireNonNull(out, "Encoder cannot be null"); - write(root, datum, out); + try { + write(root, datum, out); + } catch (TracingNullPointException | TracingClassCastException | TracingAvroTypeException e) { + throw e.summarize(root); + } } /** Called to write data. */ @@ -86,7 +98,7 @@ protected void write(Schema schema, Object datum, Encoder out) throws IOExceptio /** * Convert a high level representation of a logical type (such as a BigDecimal) - * to the its underlying representation object (such as a ByteBuffer). + * to its underlying representation object (such as a ByteBuffer). * * @throws IllegalArgumentException if a null schema or logicalType is passed in * while datum and conversion are not null. @@ -125,8 +137,10 @@ protected Object convert(Schema schema, LogicalType logicalType, Conversion< /** Called to write data. */ protected void writeWithoutConversion(Schema schema, Object datum, Encoder out) throws IOException { + int unionIndex = -1; + Schema.Type schemaType = schema.getType(); try { - switch (schema.getType()) { + switch (schemaType) { case RECORD: writeRecord(schema, datum, out); break; @@ -140,9 +154,9 @@ protected void writeWithoutConversion(Schema schema, Object datum, Encoder out) writeMap(schema, datum, out); break; case UNION: - int index = resolveUnion(schema, datum); - out.writeIndex(index); - write(schema.getTypes().get(index), datum, out); + unionIndex = resolveUnion(schema, datum); + out.writeIndex(unionIndex); + write(schema.getTypes().get(unionIndex), datum, out); break; case FIXED: writeFixed(schema, datum, out); @@ -174,8 +188,18 @@ protected void writeWithoutConversion(Schema schema, Object datum, Encoder out) default: error(schema, datum); } + } catch (TracingNullPointException | TracingClassCastException | TracingAvroTypeException e) { + if (schemaType == Schema.Type.UNION) { + e.tracePath(new UnionTypePredicate(schema.getTypes().get(unionIndex).getName())); + } + // writeArray() and writeMap() have their own handling + throw e; } catch (NullPointerException e) { - throw npe(e, " of " + schema.getFullName()); + throw new TracingNullPointException(e, schema, false); + } catch (ClassCastException e) { + throw new TracingClassCastException(e, datum, schema, false); + } catch (AvroTypeException e) { + throw new TracingAvroTypeException(e); } } @@ -195,9 +219,7 @@ protected ClassCastException addClassCastMsg(ClassCastException e, String s) { /** Helper method for adding a message to an Avro Type Exception . */ protected AvroTypeException addAvroTypeMsg(AvroTypeException e, String s) { - AvroTypeException result = new AvroTypeException(e.getMessage() + s); - result.initCause(e.getCause() == null ? e : e.getCause()); - return result; + return new AvroTypeException(e.getMessage() + s, e.getCause() == null ? e : e.getCause()); } /** @@ -223,6 +245,9 @@ protected void writeField(Object datum, Field f, Encoder out, Object state) thro final UnresolvedUnionException unresolvedUnionException = new UnresolvedUnionException(f.schema(), f, value); unresolvedUnionException.addSuppressed(uue); throw unresolvedUnionException; + } catch (TracingNullPointException | TracingClassCastException | TracingAvroTypeException e) { + e.tracePath(new LocationStep(".", f.name())); + throw e; } catch (NullPointerException e) { throw npe(e, " in field " + f.name()); } catch (ClassCastException cce) { @@ -237,8 +262,11 @@ protected void writeField(Object datum, Field f, Encoder out, Object state) thro * representations. */ protected void writeEnum(Schema schema, Object datum, Encoder out) throws IOException { - if (!data.isEnum(datum)) - throw new AvroTypeException("Not an enum: " + datum + " for schema: " + schema); + if (!data.isEnum(datum)) { + AvroTypeException cause = new AvroTypeException( + "value " + SchemaUtil.describe(datum) + " is not a " + SchemaUtil.describe(schema)); + throw new TracingAvroTypeException(cause); + } out.writeEnum(schema.getEnumOrdinal(datum.toString())); } @@ -252,9 +280,14 @@ protected void writeArray(Schema schema, Object datum, Encoder out) throws IOExc long actualSize = 0; out.writeArrayStart(); out.setItemCount(size); - for (Iterator it = getArrayElements(datum); it.hasNext();) { + for (Iterator it = getArrayElements(datum); it.hasNext();) { out.startItem(); - write(element, it.next(), out); + try { + write(element, it.next(), out); + } catch (TracingNullPointException | TracingClassCastException | TracingAvroTypeException e) { + e.tracePath(new ArrayPositionPredicate(actualSize)); + throw e; + } actualSize++; } out.writeArrayEnd(); @@ -276,18 +309,16 @@ protected int resolveUnion(Schema union, Object datum) { * Called by the default implementation of {@link #writeArray} to get the size * of an array. The default implementation is for {@link Collection}. */ - @SuppressWarnings("unchecked") protected long getArraySize(Object array) { - return ((Collection) array).size(); + return ((Collection) array).size(); } /** * Called by the default implementation of {@link #writeArray} to enumerate * array elements. The default implementation is for {@link Collection}. */ - @SuppressWarnings("unchecked") - protected Iterator getArrayElements(Object array) { - return ((Collection) array).iterator(); + protected Iterator getArrayElements(Object array) { + return ((Collection) array).iterator(); } /** @@ -301,8 +332,21 @@ protected void writeMap(Schema schema, Object datum, Encoder out) throws IOExcep out.setItemCount(size); for (Map.Entry entry : getMapEntries(datum)) { out.startItem(); - writeString(entry.getKey().toString(), out); - write(value, entry.getValue(), out); + String key; + try { + key = entry.getKey().toString(); + } catch (NullPointerException npe) { + TracingNullPointException tnpe = new TracingNullPointException(npe, Schema.create(Schema.Type.STRING), false); + tnpe.tracePath(new MapKeyPredicate(null)); + throw tnpe; + } + writeString(key, out); + try { + write(value, entry.getValue(), out); + } catch (TracingNullPointException | TracingClassCastException | TracingAvroTypeException e) { + e.tracePath(new MapKeyPredicate(key)); + throw e; + } actualSize++; } out.writeMapEnd(); @@ -363,7 +407,7 @@ protected void writeFixed(Schema schema, Object datum, Encoder out) throws IOExc } private void error(Schema schema, Object datum) { - throw new AvroTypeException("Not a " + schema + ": " + datum); + throw new AvroTypeException("value " + SchemaUtil.describe(datum) + " is not a " + SchemaUtil.describe(schema)); } } diff --git a/lang/java/avro/src/main/java/org/apache/avro/generic/PrimitivesArrays.java b/lang/java/avro/src/main/java/org/apache/avro/generic/PrimitivesArrays.java new file mode 100644 index 00000000000..d23d46ae9ec --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/generic/PrimitivesArrays.java @@ -0,0 +1,665 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.generic; + +import org.apache.avro.Schema; + +import java.util.Arrays; +import java.util.Collection; + +public class PrimitivesArrays { + + /** + * Create a primitive array if the value type is has an associated optimised + * implementation, otherwise a generic array is returned. The value type is + * determined form the convertedElementType if supplied, otherwise the + * underlying type from the schema + * + * @param size the size of the array to create + * @param schema the schema of the array + * @param valueType the converted elements value type. This may not be the same + * and the schema if for instance there is a logical type, and + * a convertor is use + * @return an instance of a primitive array or a Generic array if the value type + * is does not have an associated optimised implementation. + */ + public static GenericData.AbstractArray createOptimizedArray(int size, Schema schema, Schema.Type valueType) { + + if (valueType != null) + switch (valueType) { + case INT: + return new PrimitivesArrays.IntArray(size, schema); + case BOOLEAN: + return new PrimitivesArrays.BooleanArray(size, schema); + case LONG: + return new PrimitivesArrays.LongArray(size, schema); + case FLOAT: + return new PrimitivesArrays.FloatArray(size, schema); + case DOUBLE: + return new PrimitivesArrays.DoubleArray(size, schema); + default: + break; + } + return new GenericData.Array<>(size, schema); + } + + public abstract static class PrimitiveArray extends GenericData.AbstractArray { + PrimitiveArray(Schema schema) { + super(schema); + } + + public abstract Schema.Type valueType(); + } + + public static class IntArray extends PrimitiveArray { + private static final int[] EMPTY = new int[0]; + + private int[] elements = EMPTY; + + public IntArray(int capacity, Schema schema) { + super(schema); + if (capacity != 0) + elements = new int[capacity]; + } + + public IntArray(Schema schema, Collection c) { + super(schema); + if (c != null) { + elements = new int[c.size()]; + addAll(c); + } + } + + @Override + public void clear() { + size = 0; + } + + @Override + public Integer get(int i) { + return this.getInt(i); + } + + /** + * Direct primitive int access. + * + * @param i : index. + * @return value at index. + */ + public int getInt(int i) { + if (i >= size) + throw new IndexOutOfBoundsException("Index " + i + " out of bounds."); + return elements[i]; + } + + @Override + public void add(int location, Integer o) { + if (o == null) { + return; + } + this.add(location, o.intValue()); + } + + public void add(int location, int o) { + if (location > size || location < 0) { + throw new IndexOutOfBoundsException("Index " + location + " out of bounds."); + } + if (size == elements.length) { + // Increase size by 1.5x + 1 + final int newSize = size + (size >> 1) + 1; + elements = Arrays.copyOf(elements, newSize); + } + System.arraycopy(elements, location, elements, location + 1, size - location); + elements[location] = o; + size++; + } + + @Override + public Integer set(int i, Integer o) { + if (o == null) { + return null; + } + return this.set(i, o.intValue()); + } + + public int set(int i, int o) { + if (i >= size) + throw new IndexOutOfBoundsException("Index " + i + " out of bounds."); + int response = elements[i]; + elements[i] = o; + return response; + } + + @Override + public Integer remove(int i) { + if (i >= size) + throw new IndexOutOfBoundsException("Index " + i + " out of bounds."); + int result = elements[i]; + --size; + System.arraycopy(elements, i + 1, elements, i, (size - i)); + return result; + } + + @Override + public Integer peek() { + return (size < elements.length) ? elements[size] : null; + } + + @Override + protected void swap(final int index1, final int index2) { + int tmp = elements[index1]; + elements[index1] = elements[index2]; + elements[index2] = tmp; + } + + @Override + public Schema.Type valueType() { + return Schema.Type.INT; + } + } + + public static class LongArray extends PrimitiveArray { + private static final long[] EMPTY = new long[0]; + + private long[] elements = EMPTY; + + public LongArray(int capacity, Schema schema) { + super(schema); + if (capacity != 0) + elements = new long[capacity]; + } + + public LongArray(Schema schema, Collection c) { + super(schema); + if (c != null) { + elements = new long[c.size()]; + addAll(c); + } + } + + @Override + public void clear() { + size = 0; + } + + @Override + public Long get(int i) { + return getLong(i); + } + + /** + * Direct primitive int access. + * + * @param i : index. + * @return value at index. + */ + public long getLong(int i) { + if (i >= size) + throw new IndexOutOfBoundsException("Index " + i + " out of bounds."); + return elements[i]; + } + + @Override + public void add(int location, Long o) { + if (o == null) { + return; + } + this.add(location, o.longValue()); + } + + public void add(int location, long o) { + if (location > size || location < 0) { + throw new IndexOutOfBoundsException("Index " + location + " out of bounds."); + } + if (size == elements.length) { + // Increase size by 1.5x + 1 + final int newSize = size + (size >> 1) + 1; + elements = Arrays.copyOf(elements, newSize); + } + System.arraycopy(elements, location, elements, location + 1, size - location); + elements[location] = o; + size++; + } + + @Override + public Long set(int i, Long o) { + if (o == null) { + return null; + } + return this.set(i, o.longValue()); + } + + public long set(int i, long o) { + if (i >= size) + throw new IndexOutOfBoundsException("Index " + i + " out of bounds."); + long response = elements[i]; + elements[i] = o; + return response; + } + + @Override + public Long remove(int i) { + if (i >= size) + throw new IndexOutOfBoundsException("Index " + i + " out of bounds."); + long result = elements[i]; + --size; + System.arraycopy(elements, i + 1, elements, i, (size - i)); + return result; + } + + @Override + public Long peek() { + return (size < elements.length) ? elements[size] : null; + } + + @Override + protected void swap(final int index1, final int index2) { + long tmp = elements[index1]; + elements[index1] = elements[index2]; + elements[index2] = tmp; + } + + @Override + public Schema.Type valueType() { + return Schema.Type.LONG; + } + } + + public static class BooleanArray extends PrimitiveArray { + private static final byte[] EMPTY = new byte[0]; + + private byte[] elements = EMPTY; + + public BooleanArray(int capacity, Schema schema) { + super(schema); + if (capacity != 0) + elements = new byte[1 + (capacity / Byte.SIZE)]; + } + + public BooleanArray(Schema schema, Collection c) { + super(schema); + + if (c != null) { + elements = new byte[1 + (c.size() / 8)]; + if (c instanceof BooleanArray) { + BooleanArray other = (BooleanArray) c; + this.size = other.size; + System.arraycopy(other.elements, 0, this.elements, 0, other.elements.length); + } else { + addAll(c); + } + } + } + + @Override + public void clear() { + size = 0; + } + + @Override + public Boolean get(int i) { + return this.getBoolean(i); + } + + /** + * Direct primitive int access. + * + * @param i : index. + * @return value at index. + */ + public boolean getBoolean(int i) { + if (i >= size) + throw new IndexOutOfBoundsException("Index " + i + " out of bounds."); + return (elements[i / 8] & (1 << (i % 8))) > 0; + } + + @Override + public boolean add(final Boolean o) { + if (o == null) { + return false; + } + return this.add(o.booleanValue()); + } + + public boolean add(final boolean o) { + if (this.size == elements.length * 8) { + final int newLength = elements.length + (elements.length >> 1) + 1; + elements = Arrays.copyOf(elements, newLength); + } + this.size++; + this.set(this.size - 1, o); + return true; + } + + @Override + public void add(int location, Boolean o) { + if (o == null) { + return; + } + this.add(location, o.booleanValue()); + } + + public void add(int location, boolean o) { + if (location > size || location < 0) { + throw new IndexOutOfBoundsException("Index " + location + " out of bounds."); + } + if (size == elements.length * 8) { + // Increase size by 1.5x + 1 + final int newLength = elements.length + (elements.length >> 1) + 1; + elements = Arrays.copyOf(elements, newLength); + } + size++; + for (int index = this.size / 8; index > (location / 8); index--) { + elements[index] <<= 1; + if ((elements[index - 1] & (1 << Byte.SIZE)) > 0) { + elements[index] |= 1; + } + } + byte pos = (byte) (1 << (location % Byte.SIZE)); + byte highbits = (byte) ~(pos + (pos - 1)); + byte lowbits = (byte) (pos - 1); + byte currentHigh = (byte) ((elements[location / 8] & highbits) << 1); + byte currentLow = (byte) (elements[location / 8] & lowbits); + if (o) { + elements[location / 8] = (byte) (currentHigh | currentLow | pos); + } else { + elements[location / 8] = (byte) (currentHigh | currentLow); + } + + } + + @Override + public Boolean set(int i, Boolean o) { + if (o == null) { + return null; + } + return this.set(i, o.booleanValue()); + } + + public boolean set(int i, boolean o) { + if (i >= size) + throw new IndexOutOfBoundsException("Index " + i + " out of bounds."); + boolean response = (elements[i / 8] & (1 << (i % 8))) > 0; + if (o) { + elements[i / 8] |= 1 << (i % 8); + } else { + elements[i / 8] &= 0xFF - (1 << (i % 8)); + } + return response; + } + + @Override + public Boolean remove(int i) { + if (i >= size) + throw new IndexOutOfBoundsException("Index " + i + " out of bounds."); + boolean result = (elements[(i / 8)] & (1 << (i % 8))) > 0; + --size; + + byte memo = 0; + if ((i / 8) + 1 < elements.length) { + memo = (byte) ((1 & (elements[(i / 8) + 1])) << 7); + } + for (int index = (i / 8) + 1; index <= (size / 8); index++) { + elements[index] = (byte) ((elements[index] & 0xff) >>> 1); + if (index + 1 < elements.length && (elements[index + 1] & 1) == 1) { + elements[index] |= 1 << (Byte.SIZE - 1); + } + } + // 87654321 => 8764321 + byte start = (byte) ((1 << ((i + 1) % 8)) - 1); + byte end = (byte) ~start; + elements[i / 8] = (byte) ((((start & 0xff) >>> 1) & elements[i / 8]) // 1234 + | (end & (elements[i / 8] >> 1)) // 876 + | memo); + + return result; + } + + @Override + public Boolean peek() { + return (size < elements.length * Byte.SIZE) ? (elements[(size / 8)] & (1 << (size % 8))) > 0 : null; + } + + @Override + protected void swap(final int index1, final int index2) { + boolean tmp = this.get(index1); + this.set(index1, this.get(index2)); + this.set(index2, tmp); + } + + @Override + public Schema.Type valueType() { + return Schema.Type.BOOLEAN; + } + } + + public static class FloatArray extends PrimitiveArray { + private static final float[] EMPTY = new float[0]; + + private float[] elements = EMPTY; + + public FloatArray(int capacity, Schema schema) { + super(schema); + if (capacity != 0) + elements = new float[capacity]; + } + + public FloatArray(Schema schema, Collection c) { + super(schema); + if (c != null) { + elements = new float[c.size()]; + addAll(c); + } + } + + @Override + public void clear() { + size = 0; + } + + @Override + public Float get(int i) { + return this.getFloat(i); + } + + /** + * Direct primitive int access. + * + * @param i : index. + * @return value at index. + */ + public float getFloat(int i) { + if (i >= size) + throw new IndexOutOfBoundsException("Index " + i + " out of bounds."); + return elements[i]; + } + + @Override + public void add(int location, Float o) { + if (o == null) { + return; + } + this.add(location, o.floatValue()); + } + + public void add(int location, float o) { + if (location > size || location < 0) { + throw new IndexOutOfBoundsException("Index " + location + " out of bounds."); + } + if (size == elements.length) { + // Increase size by 1.5x + 1 + final int newSize = size + (size >> 1) + 1; + elements = Arrays.copyOf(elements, newSize); + } + System.arraycopy(elements, location, elements, location + 1, size - location); + elements[location] = o; + size++; + } + + @Override + public Float set(int i, Float o) { + if (o == null) { + return null; + } + return this.set(i, o.floatValue()); + } + + public float set(int i, float o) { + if (i >= size) + throw new IndexOutOfBoundsException("Index " + i + " out of bounds."); + float response = elements[i]; + elements[i] = o; + return response; + } + + @Override + public Float remove(int i) { + if (i >= size) + throw new IndexOutOfBoundsException("Index " + i + " out of bounds."); + float result = elements[i]; + --size; + System.arraycopy(elements, i + 1, elements, i, (size - i)); + return result; + } + + @Override + public Float peek() { + return (size < elements.length) ? elements[size] : null; + } + + @Override + protected void swap(final int index1, final int index2) { + float tmp = this.get(index1); + this.set(index1, this.get(index2)); + this.set(index2, tmp); + } + + @Override + public Schema.Type valueType() { + return Schema.Type.FLOAT; + } + } + + public static class DoubleArray extends PrimitiveArray { + private static final double[] EMPTY = new double[0]; + + private double[] elements = EMPTY; + + public DoubleArray(int capacity, Schema schema) { + super(schema); + if (capacity != 0) + elements = new double[capacity]; + } + + public DoubleArray(Schema schema, Collection c) { + super(schema); + if (c != null) { + elements = new double[c.size()]; + addAll(c); + } + } + + @Override + public void clear() { + size = 0; + } + + @Override + public Double get(int i) { + return this.getDouble(i); + } + + /** + * Direct primitive int access. + * + * @param i : index. + * @return value at index. + */ + public double getDouble(int i) { + if (i >= size) + throw new IndexOutOfBoundsException("Index " + i + " out of bounds."); + return elements[i]; + } + + @Override + public void add(int location, Double o) { + if (o == null) { + return; + } + this.add(location, o.doubleValue()); + } + + public void add(int location, double o) { + if (location > size || location < 0) { + throw new IndexOutOfBoundsException("Index " + location + " out of bounds."); + } + if (size == elements.length) { + // Increase size by 1.5x + 1 + final int newSize = size + (size >> 1) + 1; + elements = Arrays.copyOf(elements, newSize); + } + System.arraycopy(elements, location, elements, location + 1, size - location); + elements[location] = o; + size++; + } + + @Override + public Double set(int i, Double o) { + if (o == null) { + return null; + } + return this.set(i, o.doubleValue()); + } + + public double set(int i, double o) { + if (i >= size) + throw new IndexOutOfBoundsException("Index " + i + " out of bounds."); + double response = elements[i]; + elements[i] = o; + return response; + } + + @Override + public Double remove(int i) { + if (i >= size) + throw new IndexOutOfBoundsException("Index " + i + " out of bounds."); + double result = elements[i]; + --size; + System.arraycopy(elements, i + 1, elements, i, (size - i)); + return result; + } + + @Override + public Double peek() { + return (size < elements.length) ? elements[size] : null; + } + + @Override + protected void swap(final int index1, final int index2) { + double tmp = this.get(index1); + this.set(index1, this.get(index2)); + this.set(index2, tmp); + } + + @Override + public Schema.Type valueType() { + return Schema.Type.DOUBLE; + } + } + +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryData.java b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryData.java index b0fc87ab9b8..0df469d4957 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryData.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryData.java @@ -18,11 +18,13 @@ package org.apache.avro.io; import java.io.IOException; +import java.util.Arrays; import org.apache.avro.Schema; import org.apache.avro.Schema.Field; import org.apache.avro.AvroRuntimeException; import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.util.internal.ThreadLocalWithInitial; /** Utilities for binary-encoded data. */ public class BinaryData { @@ -49,7 +51,7 @@ public void clear() { } } // no public ctor - private static final ThreadLocal DECODERS = ThreadLocal.withInitial(Decoders::new); + private static final ThreadLocal DECODERS = ThreadLocalWithInitial.of(Decoders::new); /** * Compare binary encoded data. If equal, return zero. If greater-than, return @@ -177,19 +179,10 @@ private static int compare(Decoders d, Schema schema) throws IOException { /** * Lexicographically compare bytes. If equal, return zero. If greater-than, - * return a positive value, if less than return a negative value. + * return a positive value, if less than, return a negative value. */ public static int compareBytes(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { - int end1 = s1 + l1; - int end2 = s2 + l2; - for (int i = s1, j = s2; i < end1 && j < end2; i++, j++) { - int a = (b1[i] & 0xff); - int b = (b2[j] & 0xff); - if (a != b) { - return a - b; - } - } - return l1 - l2; + return Arrays.compareUnsigned(b1, s1, s1 + l1, b2, s2, s2 + l2); } private static class HashData { @@ -204,7 +197,7 @@ public void set(byte[] bytes, int start, int len) { } } - private static final ThreadLocal HASH_DATA = ThreadLocal.withInitial(HashData::new); + private static final ThreadLocal HASH_DATA = ThreadLocalWithInitial.of(HashData::new); /** * Hash binary encoded data. Consistent with @@ -260,11 +253,11 @@ private static int hashCode(HashData data, Schema schema) throws IOException { case UNION: return hashCode(data, schema.getTypes().get(decoder.readInt())); case FIXED: - return hashBytes(1, data, schema.getFixedSize(), false); + return hashBytes(data, schema.getFixedSize(), false); case STRING: - return hashBytes(0, data, decoder.readInt(), false); + return hashBytes(data, decoder.readInt(), false); case BYTES: - return hashBytes(1, data, decoder.readInt(), true); + return hashBytes(data, decoder.readInt(), true); case NULL: return 0; default: @@ -272,8 +265,8 @@ private static int hashCode(HashData data, Schema schema) throws IOException { } } - private static int hashBytes(int init, HashData data, int len, boolean rev) throws IOException { - int hashCode = init; + private static int hashBytes(HashData data, int len, boolean rev) throws IOException { + int hashCode = 1; byte[] bytes = data.decoder.getBuf(); int start = data.decoder.getPos(); int end = start + len; @@ -297,7 +290,7 @@ public static int skipLong(final byte[] bytes, int start) { /** * Encode a boolean to the byte array at the given position. Will throw * IndexOutOfBounds if the position is not valid. - * + * * @return The number of bytes written to the buffer, 1. */ public static int encodeBoolean(boolean b, byte[] buf, int pos) { @@ -309,7 +302,7 @@ public static int encodeBoolean(boolean b, byte[] buf, int pos) { * Encode an integer to the byte array at the given position. Will throw * IndexOutOfBounds if it overflows. Users should ensure that there are at least * 5 bytes left in the buffer before calling this method. - * + * * @return The number of bytes written to the buffer, between 1 and 5. */ public static int encodeInt(int n, byte[] buf, int pos) { @@ -340,7 +333,7 @@ public static int encodeInt(int n, byte[] buf, int pos) { * Encode a long to the byte array at the given position. Will throw * IndexOutOfBounds if it overflows. Users should ensure that there are at least * 10 bytes left in the buffer before calling this method. - * + * * @return The number of bytes written to the buffer, between 1 and 10. */ public static int encodeLong(long n, byte[] buf, int pos) { @@ -391,7 +384,7 @@ public static int encodeLong(long n, byte[] buf, int pos) { * Encode a float to the byte array at the given position. Will throw * IndexOutOfBounds if it overflows. Users should ensure that there are at least * 4 bytes left in the buffer before calling this method. - * + * * @return Returns the number of bytes written to the buffer, 4. */ public static int encodeFloat(float f, byte[] buf, int pos) { @@ -407,7 +400,7 @@ public static int encodeFloat(float f, byte[] buf, int pos) { * Encode a double to the byte array at the given position. Will throw * IndexOutOfBounds if it overflows. Users should ensure that there are at least * 8 bytes left in the buffer before calling this method. - * + * * @return Returns the number of bytes written to the buffer, 8. */ public static int encodeDouble(double d, byte[] buf, int pos) { diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryDecoder.java b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryDecoder.java index 44d2b764009..77fc8490764 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryDecoder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryDecoder.java @@ -17,18 +17,19 @@ */ package org.apache.avro.io; +import org.apache.avro.AvroRuntimeException; +import org.apache.avro.InvalidNumberEncodingException; +import org.apache.avro.SystemLimitException; +import org.apache.avro.util.ByteBufferInputStream; +import org.apache.avro.util.Utf8; + +import java.io.ByteArrayInputStream; import java.io.EOFException; import java.io.IOException; import java.io.InputStream; -import java.nio.Buffer; import java.nio.ByteBuffer; import java.util.Arrays; -import org.apache.avro.AvroRuntimeException; -import org.apache.avro.InvalidNumberEncodingException; -import org.apache.avro.util.Utf8; -import org.slf4j.LoggerFactory; - /** * An {@link Decoder} for binary-format data. *

    @@ -39,27 +40,20 @@ * can be accessed by inputStream().remaining(), if the BinaryDecoder is not * 'direct'. *

    - * To prevent this class from making large allocations when handling potentially - * pathological input data, set Java properties - * org.apache.avro.limits.string.maxLength and - * org.apache.avro.limits.bytes.maxLength before instantiating this - * class to limit the maximum sizes of string and bytes types - * handled. The default is to permit sizes up to Java's maximum array length. * * @see Encoder + * @see SystemLimitException */ public class BinaryDecoder extends Decoder { /** - * The maximum size of array to allocate. Some VMs reserve some header words in - * an array. Attempts to allocate larger arrays may result in OutOfMemoryError: - * Requested array size exceeds VM limit + * When reading a collection (MAP or ARRAY), this keeps track of the number of + * elements to ensure that the + * {@link SystemLimitException#checkMaxCollectionLength} constraint is + * respected. */ - static final long MAX_ARRAY_SIZE = (long) Integer.MAX_VALUE - 8L; - - private static final String MAX_BYTES_LENGTH_PROPERTY = "org.apache.avro.limits.bytes.maxLength"; - private final int maxBytesLength; + private long collectionCount = 0L; private ByteSource source = null; // we keep the buffer and its state variables in this class and not in a @@ -99,17 +93,6 @@ void clearBuf() { /** protected constructor for child classes */ protected BinaryDecoder() { super(); - String o = System.getProperty(MAX_BYTES_LENGTH_PROPERTY); - int i = Integer.MAX_VALUE; - if (o != null) { - try { - i = Integer.parseUnsignedInt(o); - } catch (NumberFormatException nfe) { - LoggerFactory.getLogger(BinaryDecoder.class) - .warn("Could not parse property " + MAX_BYTES_LENGTH_PROPERTY + ": " + o, nfe); - } - } - maxBytesLength = i; } BinaryDecoder(InputStream in, int bufferSize) { @@ -203,10 +186,25 @@ public int readInt() throws IOException { @Override public long readLong() throws IOException { ensureBounds(10); - int b = buf[pos++] & 0xff; - int n = b & 0x7f; + + /* + * Long values are used for many different areas of the spec, for example: a + * string is encoded as a long followed by that many bytes of UTF-8 encoded + * character data. Because of this, long values actually tend to be pretty small + * on average, and so can often fit within the first byte of the variable-length + * array. Therefore, the first byte is prioritized. For the first byte, if the + * high-order bit is set, this indicates there are more bytes to read, but also + * this means a signed value >= 0 does not have any following bytes. + */ long l; - if (b > 0x7f) { + int b, n; + if ((b = buf[pos++]) == 0) { + return 0; + } else if (b > 0) { + // back to two's-complement (zig-zag) + return (b >>> 1) ^ -(b & 1); + } else { + n = b & 0x7f; b = buf[pos++] & 0xff; n ^= (b & 0x7f) << 7; if (b > 0x7f) { @@ -218,7 +216,7 @@ public long readLong() throws IOException { if (b > 0x7f) { // only the low 28 bits can be set, so this won't carry // the sign bit to the long - l = innerLongDecode((long) n); + l = innerLongDecode(n); } else { l = n; } @@ -228,8 +226,6 @@ public long readLong() throws IOException { } else { l = n; } - } else { - l = n; } if (pos > limit) { throw new EOFException(); @@ -300,17 +296,12 @@ public double readDouble() throws IOException { @Override public Utf8 readString(Utf8 old) throws IOException { - long length = readLong(); - if (length > MAX_ARRAY_SIZE) { - throw new UnsupportedOperationException("Cannot read strings longer than " + MAX_ARRAY_SIZE + " bytes"); - } - if (length < 0L) { - throw new AvroRuntimeException("Malformed data. Length is negative: " + length); - } + int length = SystemLimitException.checkMaxStringLength(readLong()); + ensureAvailableBytes(length); Utf8 result = (old != null ? old : new Utf8()); - result.setByteLength((int) length); - if (0L != length) { - doReadBytes(result.getBytes(), 0, (int) length); + result.setByteLength(length); + if (0 != length) { + doReadBytes(result.getBytes(), 0, length); } return result; } @@ -329,25 +320,17 @@ public void skipString() throws IOException { @Override public ByteBuffer readBytes(ByteBuffer old) throws IOException { - int length = readInt(); - if (length > MAX_ARRAY_SIZE) { - throw new UnsupportedOperationException("Cannot read arrays longer than " + MAX_ARRAY_SIZE + " bytes"); - } - if (length > maxBytesLength) { - throw new AvroRuntimeException("Bytes length " + length + " exceeds maximum allowed"); - } - if (length < 0L) { - throw new AvroRuntimeException("Malformed data. Length is negative: " + length); - } + int length = SystemLimitException.checkMaxBytesLength(readLong()); + ensureAvailableBytes(length); final ByteBuffer result; if (old != null && length <= old.capacity()) { result = old; - ((Buffer) result).clear(); + result.clear(); } else { result = ByteBuffer.allocate(length); } doReadBytes(result.array(), result.position(), length); - ((Buffer) result).limit(length); + result.limit(length); return result; } @@ -372,6 +355,9 @@ public int readEnum() throws IOException { } protected void doSkipBytes(long length) throws IOException { + if (length <= 0) { + return; + } int remaining = limit - pos; if (length <= remaining) { pos = (int) (pos + length); @@ -443,7 +429,6 @@ protected long doReadItemCount() throws IOException { * @return Zero if there are no more items to skip and end of array/map is * reached. Positive number if some items are found that cannot be * skipped and the client needs to skip them individually. - * * @throws IOException If the first byte cannot be read for any reason other * than the end of the file, if the input stream has been * closed, or if some other I/O error occurs. @@ -460,12 +445,15 @@ private long doSkipItems() throws IOException { @Override public long readArrayStart() throws IOException { - return doReadItemCount(); + collectionCount = SystemLimitException.checkMaxCollectionLength(doReadItemCount()); + return collectionCount; } @Override public long arrayNext() throws IOException { - return doReadItemCount(); + long length = doReadItemCount(); + collectionCount = SystemLimitException.checkMaxCollectionLength(collectionCount, length); + return length; } @Override @@ -475,12 +463,15 @@ public long skipArray() throws IOException { @Override public long readMapStart() throws IOException { - return doReadItemCount(); + collectionCount = SystemLimitException.checkMaxCollectionLength(doReadItemCount()); + return collectionCount; } @Override public long mapNext() throws IOException { - return doReadItemCount(); + long length = doReadItemCount(); + collectionCount = SystemLimitException.checkMaxCollectionLength(collectionCount, length); + return length; } @Override @@ -521,6 +512,21 @@ public boolean isEnd() throws IOException { return (0 == read); } + /** + * Returns the total number of bytes remaining that can be read from this + * decoder (including any buffered bytes), or {@code -1} if the total is + * unknown. + *

    + * Byte-array-backed decoders return an exact count. InputStream-backed decoders + * return an exact count only when the wrapped stream can report one. + *

    + * {@link DirectBinaryDecoder} always returns {@code -1}. + */ + @Override + public int remainingBytes() { + return source != null ? source.remainingBytes() : -1; + } + /** * Ensures that buf[pos + num - 1] is not out of the buffer array bounds. * However, buf[pos + num -1] may be >= limit if there is not enough data left @@ -543,6 +549,27 @@ private void ensureBounds(int num) throws IOException { } } + /** + * Validates that the source has at least {@code length} bytes remaining before + * proceeding. Throws early if the declared length is inconsistent with the + * available data. + *

    + * This check is only applied when the decoder knows the exact remaining byte + * count. + * + * @param length the number of bytes expected to be available + * @throws EOFException if the source is known to have fewer bytes remaining + */ + private void ensureAvailableBytes(int length) throws EOFException { + if (source != null && length > 0) { + int remaining = source.remainingBytes(); + if (remaining >= 0 && length > remaining) { + throw new EOFException( + "Attempted to read " + length + " bytes, but only " + remaining + " bytes are available"); + } + } + } + /** * Returns an {@link java.io.InputStream} that is aware of any buffering that * may occur in this BinaryDecoder. Readers that need to interleave decoding @@ -556,7 +583,7 @@ public InputStream inputStream() { /** * BufferAccessor is used by BinaryEncoder to enable {@link ByteSource}s and the - * InputStream returned by {@link BinaryDecoder.inputStream} to access the + * InputStream returned by {@link BinaryDecoder#inputStream} to access the * BinaryEncoder's buffer. When a BufferAccessor is created, it is attached to a * BinaryDecoder and its buffer. Its accessors directly reference the * BinaryDecoder's buffer. When detach() is called, it stores references to the @@ -649,15 +676,15 @@ void setBuf(byte[] buf, int offset, int length) { * stronger guarantees than InputStream, freeing client code to be simplified * and faster. *

    - * {@link skipSourceBytes} and {@link readRaw} are guaranteed to have read or + * {@link #skipSourceBytes} and {@link #readRaw} are guaranteed to have read or * skipped as many bytes as possible, or throw EOFException. - * {@link trySkipBytes} and {@link tryRead} are guaranteed to attempt to read or - * skip as many bytes as possible and never throw EOFException, while returning - * the exact number of bytes skipped or read. {@link isEof} returns true if all - * the source bytes have been read or skipped. This condition can also be - * detected by a client if an EOFException is thrown from - * {@link skipSourceBytes} or {@link readRaw}, or if {@link trySkipBytes} or - * {@link tryRead} return 0; + * {@link #trySkipBytes} and {@link #tryReadRaw} are guaranteed to attempt to + * read or skip as many bytes as possible and never throw EOFException, while + * returning the exact number of bytes skipped or read. {@link #isEof} returns + * true if all the source bytes have been read or skipped. This condition can + * also be detected by a client if an EOFException is thrown from + * {@link #skipSourceBytes} or {@link #readRaw}, or if {@link #trySkipBytes} or + * {@link #tryReadRaw} return 0; *

    * A ByteSource also implements the InputStream contract for use by APIs that * require it. The InputStream interface must take into account buffering in any @@ -677,6 +704,12 @@ protected ByteSource() { abstract boolean isEof(); + /** + * Returns the total number of bytes remaining that can be read from this source + * (including any buffered bytes), or {@code -1} if the total is unknown. + */ + protected abstract int remainingBytes(); + protected void attach(int bufferSize, BinaryDecoder decoder) { decoder.buf = new byte[bufferSize]; decoder.pos = 0; @@ -805,7 +838,7 @@ public int available() throws IOException { } private static class InputStreamByteSource extends ByteSource { - private InputStream in; + private final InputStream in; protected boolean isEof = false; private InputStreamByteSource(InputStream in) { @@ -923,6 +956,20 @@ public boolean isEof() { return isEof; } + @Override + protected int remainingBytes() { + int buffered = ba.getLim() - ba.getPos(); + try { + if (in.getClass() == ByteArrayInputStream.class || in.getClass() == ByteBufferInputStream.class) { + long total = (long) buffered + in.available(); + return (int) Math.min(total, Integer.MAX_VALUE); + } + } catch (IOException e) { + return -1; + } + return -1; + } + @Override public void close() throws IOException { in.close(); @@ -932,11 +979,10 @@ public void close() throws IOException { /** * This byte source is special. It will avoid copying data by using the source's * byte[] as a buffer in the decoder. - * */ private static class ByteArrayByteSource extends ByteSource { private static final int MIN_SIZE = 16; - private byte[] data; + private final byte[] data; private int position; private int max; private boolean compacted = false; @@ -976,7 +1022,7 @@ protected void skipSourceBytes(long length) throws IOException { } @Override - protected long trySkipBytes(long length) throws IOException { + protected long trySkipBytes(long length) { // the buffer is shared, so this should return 0 max = ba.getLim(); position = ba.getPos(); @@ -1001,13 +1047,13 @@ protected void readRaw(byte[] data, int off, int len) throws IOException { } @Override - protected int tryReadRaw(byte[] data, int off, int len) throws IOException { + protected int tryReadRaw(byte[] data, int off, int len) { // the buffer is shared, nothing to read return 0; } @Override - protected void compactAndFill(byte[] buf, int pos, int minPos, int remaining) throws IOException { + protected void compactAndFill(byte[] buf, int pos, int minPos, int remaining) { // this implementation does not want to mutate the array passed in, // so it makes a new tiny buffer unless it has been compacted once before if (!compacted) { @@ -1042,5 +1088,10 @@ public boolean isEof() { int remaining = ba.getLim() - ba.getPos(); return (remaining == 0); } + + @Override + protected int remainingBytes() { + return ba.getLim() - ba.getPos(); + } } } diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryEncoder.java b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryEncoder.java index 22d0326165c..97d9895c74f 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryEncoder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryEncoder.java @@ -37,6 +37,13 @@ */ public abstract class BinaryEncoder extends Encoder { + /* + * Buffer used for writing ASCII strings. A string is encoded as a long followed + * by that many bytes of character data. A string of length 63 is the upper + * limit for a 1 byte variable-length long value. + */ + private final byte[] stringBuffer = new byte[63]; + @Override public void writeNull() throws IOException { } @@ -48,10 +55,47 @@ public void writeString(Utf8 utf8) throws IOException { @Override public void writeString(String string) throws IOException { - if (0 == string.length()) { + /* empty string short-circuit */ + if (string.isEmpty()) { writeZero(); return; } + + /* + * Assume the String is ASCII. If the ASCII String fits into the existing + * buffer, copy the characters into the buffer and write it to the underlying + * Encoder. If the String is too long, or ends up not being ASCII, then + * fall-back to the default JDK mechanism for handling String to byte array. + */ + final int stringLength = string.length(); + if (stringLength <= stringBuffer.length) { + boolean onlyAscii = true; + for (int i = 0; onlyAscii && (i < stringLength); i++) { + /* + * The char data type is a single 16-bit Unicode character (UTF-16). ASCII, is a + * 7-bit character encoding. Therefore, if the value is larger than 127, it + * cannot be ASCII. If it is ASCII, it is safe to trim to byte. + */ + final char c = string.charAt(i); + if (c >= 0x80) { + onlyAscii = false; + } else { + stringBuffer[i] = (byte) c; + } + } + if (onlyAscii) { + writeInt(stringLength); + writeFixed(stringBuffer, 0, stringLength); + return; + } + } + + /* + * The standard JDK way of turning Strings into byte arrays. Handles UTF-16 + * case. However, for ASCII this has the overhead of instantiating a new byte + * array (which pollutes the heap), and then copying the underlying bytes into + * the array, + */ byte[] bytes = string.getBytes(StandardCharsets.UTF_8); writeInt(bytes.length); writeFixed(bytes, 0, bytes.length); diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/BlockingBinaryEncoder.java b/lang/java/avro/src/main/java/org/apache/avro/io/BlockingBinaryEncoder.java index d0bfc8f075e..9a0d9e414b0 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/BlockingBinaryEncoder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/BlockingBinaryEncoder.java @@ -89,7 +89,7 @@ public enum State { * this case, {@link BlockedValue#start} is zero. The header for such a block * has _already been written_ (we've written out a header indicating that the * block has a single item, and we put a "zero" down for the byte-count to - * indicate that we don't know the physical length of the buffer. Any blocks + * indicate that we don't know the physical length of the buffer). Any blocks * _containing_ this block must be in the {@link #OVERFLOW} state. */ OVERFLOW @@ -130,7 +130,7 @@ public BlockedValue() { * Check invariants of this and also the BlockedValue * containing this. */ - public boolean check(BlockedValue prev, int pos) { + public void check(BlockedValue prev, int pos) { assert state != State.ROOT || type == null; assert (state == State.ROOT || type == Schema.Type.ARRAY || type == Schema.Type.MAP); @@ -156,7 +156,6 @@ public boolean check(BlockedValue prev, int pos) { assert prev.state == State.ROOT || prev.state == State.OVERFLOW; break; } - return false; } } @@ -179,7 +178,7 @@ public boolean check(BlockedValue prev, int pos) { // buffer large enough for up to two ints for a block header // rounded up to a multiple of 4 bytes. - private byte[] headerBuffer = new byte[12]; + private final byte[] headerBuffer = new byte[12]; private boolean check() { assert buf != null; @@ -438,7 +437,7 @@ private void endBlockedValue() throws IOException { * Called when we've finished writing the last item in an overflow buffer. When * this is finished, the top of the stack will be an empty block in the * "regular" state. - * + * * @throws IOException */ private void finishOverflow() throws IOException { diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/BlockingDirectBinaryEncoder.java b/lang/java/avro/src/main/java/org/apache/avro/io/BlockingDirectBinaryEncoder.java new file mode 100644 index 00000000000..9f391a31921 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/io/BlockingDirectBinaryEncoder.java @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.io; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.util.ArrayDeque; +import java.util.ArrayList; + +/** + * An {@link Encoder} for Avro's binary encoding that does not buffer output. + *

    + * This encoder does not buffer writes in contrast to + * {@link BufferedBinaryEncoder}. However, it is lighter-weight and useful when: + * The buffering in BufferedBinaryEncoder is not desired because you buffer a + * different level or the Encoder is very short-lived. + *

    + * The BlockingDirectBinaryEncoder will encode the number of bytes of the Map + * and Array blocks. This will allow to postpone the decoding, or skip over it + * at all. + *

    + * To construct, use + * {@link EncoderFactory#blockingDirectBinaryEncoder(OutputStream, BinaryEncoder)} + *

    + * {@link BlockingDirectBinaryEncoder} instances returned by this method are not + * thread-safe + * + * @see BinaryEncoder + * @see EncoderFactory + * @see Encoder + * @see Decoder + */ +public class BlockingDirectBinaryEncoder extends DirectBinaryEncoder { + private final ArrayList buffers; + + private final ArrayDeque stashedBuffers; + + private int depth = 0; + + private final ArrayDeque blockItemCounts; + + /** + * Create a writer that sends its output to the underlying stream + * out. + * + * @param out The OutputStream to write to + */ + public BlockingDirectBinaryEncoder(OutputStream out) { + super(out); + this.buffers = new ArrayList<>(); + this.stashedBuffers = new ArrayDeque<>(); + this.blockItemCounts = new ArrayDeque<>(); + } + + private void startBlock() { + stashedBuffers.push(out); + if (this.buffers.size() <= depth) { + this.buffers.add(new BufferOutputStream()); + } + BufferOutputStream buf = buffers.get(depth); + buf.reset(); + this.depth += 1; + this.out = buf; + } + + private void endBlock() { + if (depth == 0) { + throw new RuntimeException("Called endBlock, while not buffering a block"); + } + this.depth -= 1; + out = stashedBuffers.pop(); + BufferOutputStream buffer = this.buffers.get(depth); + long blockItemCount = blockItemCounts.pop(); + if (blockItemCount > 0) { + try { + // Make it negative, so the reader knows that the number of bytes is coming + writeLong(-blockItemCount); + writeLong(buffer.size()); + writeFixed(buffer.toBufferWithoutCopy()); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + } + + @Override + public void setItemCount(long itemCount) throws IOException { + blockItemCounts.push(itemCount); + } + + @Override + public void writeArrayStart() throws IOException { + startBlock(); + } + + @Override + public void writeArrayEnd() throws IOException { + endBlock(); + // Writes another zero to indicate that this is the last block + super.writeArrayEnd(); + } + + @Override + public void writeMapStart() throws IOException { + startBlock(); + } + + @Override + public void writeMapEnd() throws IOException { + endBlock(); + // Writes another zero to indicate that this is the last block + super.writeMapEnd(); + } + + private static class BufferOutputStream extends ByteArrayOutputStream { + BufferOutputStream() { + } + + ByteBuffer toBufferWithoutCopy() { + return ByteBuffer.wrap(buf, 0, count); + } + + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/BufferedBinaryEncoder.java b/lang/java/avro/src/main/java/org/apache/avro/io/BufferedBinaryEncoder.java index 376289ec882..b0dfa4faf67 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/BufferedBinaryEncoder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/BufferedBinaryEncoder.java @@ -41,7 +41,7 @@ *

    * To change the buffer size, configure the factory instance used to create * instances with {@link EncoderFactory#configureBufferSize(int)} - * + * * @see Encoder * @see EncoderFactory * @see BlockingBinaryEncoder @@ -105,8 +105,8 @@ private void flushBuffer() throws IOException { * current position and the end. This will not expand the buffer larger than its * current size, for writes larger than or near to the size of the buffer, we * flush the buffer and write directly to the output, bypassing the buffer. - * - * @param num + * + * @param num the number of bytes to ensure are available * @throws IOException */ private void ensureBounds(int num) throws IOException { @@ -175,14 +175,11 @@ public void writeFixed(ByteBuffer bytes) throws IOException { @Override protected void writeZero() throws IOException { - writeByte(0); - } - - private void writeByte(int b) throws IOException { + // inlined, shorter version of writeZero if (pos == buf.length) { flushBuffer(); } - buf[pos++] = (byte) (b & 0xFF); + buf[pos++] = (byte) 0; } @Override diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/DatumWriter.java b/lang/java/avro/src/main/java/org/apache/avro/io/DatumWriter.java index f1e57626912..050c25b467f 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/DatumWriter.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/DatumWriter.java @@ -17,6 +17,7 @@ */ package org.apache.avro.io; +import java.io.ByteArrayOutputStream; import java.io.IOException; import org.apache.avro.Schema; @@ -36,4 +37,19 @@ public interface DatumWriter { * the schema from the datum to the output. */ void write(D datum, Encoder out) throws IOException; + + /** + * Convenience method to Write a datum to a byte array. Traverse the schema, + * depth first, writing each leaf value in the schema from the datum to the byte + * array. + * + * @param datum The datum to serialize + * @return The serialized datum stored in an array of bytes + */ + default byte[] toByteArray(D datum) throws IOException { + try (ByteArrayOutputStream out = new ByteArrayOutputStream(128)) { + write(datum, EncoderFactory.get().directBinaryEncoder(out, null)); + return out.toByteArray(); + } + } } diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/Decoder.java b/lang/java/avro/src/main/java/org/apache/avro/io/Decoder.java index a0f4049f023..80640a61aa0 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/Decoder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/Decoder.java @@ -30,11 +30,11 @@ *

    * The other type of methods support the reading of maps and arrays. These * methods are {@link #readArrayStart}, {@link #arrayNext}, and similar methods - * for maps). See {@link #readArrayStart} for details on these methods.) + * for maps. See {@link #readArrayStart} for details on these methods. *

    * {@link DecoderFactory} contains Decoder construction and configuration * facilities. - * + * * @see DecoderFactory * @see Encoder */ @@ -44,7 +44,7 @@ public abstract class Decoder { /** * "Reads" a null value. (Doesn't actually read anything, but advances the state * of the parser if the implementation is stateful.) - * + * * @throws AvroTypeException If this is a stateful reader and null is not the * type of the next value to be read */ @@ -52,7 +52,7 @@ public abstract class Decoder { /** * Reads a boolean value written by {@link Encoder#writeBoolean}. - * + * * @throws AvroTypeException If this is a stateful reader and boolean is not the * type of the next value to be read */ @@ -61,7 +61,7 @@ public abstract class Decoder { /** * Reads an integer written by {@link Encoder#writeInt}. - * + * * @throws AvroTypeException If encoded value is larger than 32-bits * @throws AvroTypeException If this is a stateful reader and int is not the * type of the next value to be read @@ -70,7 +70,7 @@ public abstract class Decoder { /** * Reads a long written by {@link Encoder#writeLong}. - * + * * @throws AvroTypeException If this is a stateful reader and long is not the * type of the next value to be read */ @@ -78,7 +78,7 @@ public abstract class Decoder { /** * Reads a float written by {@link Encoder#writeFloat}. - * + * * @throws AvroTypeException If this is a stateful reader and is not the type of * the next value to be read */ @@ -86,7 +86,7 @@ public abstract class Decoder { /** * Reads a double written by {@link Encoder#writeDouble}. - * + * * @throws AvroTypeException If this is a stateful reader and is not the type of * the next value to be read */ @@ -94,7 +94,7 @@ public abstract class Decoder { /** * Reads a char-string written by {@link Encoder#writeString}. - * + * * @throws AvroTypeException If this is a stateful reader and char-string is not * the type of the next value to be read */ @@ -102,7 +102,7 @@ public abstract class Decoder { /** * Reads a char-string written by {@link Encoder#writeString}. - * + * * @throws AvroTypeException If this is a stateful reader and char-string is not * the type of the next value to be read */ @@ -110,7 +110,7 @@ public abstract class Decoder { /** * Discards a char-string written by {@link Encoder#writeString}. - * + * * @throws AvroTypeException If this is a stateful reader and char-string is not * the type of the next value to be read */ @@ -120,7 +120,7 @@ public abstract class Decoder { * Reads a byte-string written by {@link Encoder#writeBytes}. if old is * not null and has sufficient capacity to take in the bytes being read, the * bytes are returned in old. - * + * * @throws AvroTypeException If this is a stateful reader and byte-string is not * the type of the next value to be read */ @@ -128,7 +128,7 @@ public abstract class Decoder { /** * Discards a byte-string written by {@link Encoder#writeBytes}. - * + * * @throws AvroTypeException If this is a stateful reader and byte-string is not * the type of the next value to be read */ @@ -136,7 +136,7 @@ public abstract class Decoder { /** * Reads fixed sized binary object. - * + * * @param bytes The buffer to store the contents being read. * @param start The position where the data needs to be written. * @param length The size of the binary object. @@ -149,7 +149,7 @@ public abstract class Decoder { /** * A shorthand for readFixed(bytes, 0, bytes.length). - * + * * @throws AvroTypeException If this is a stateful reader and fixed sized binary * object is not the type of the next value to be read * or the length is incorrect. @@ -161,7 +161,7 @@ public void readFixed(byte[] bytes) throws IOException { /** * Discards fixed sized binary object. - * + * * @param length The size of the binary object to be skipped. * @throws AvroTypeException If this is a stateful reader and fixed sized binary * object is not the type of the next value to be read @@ -172,7 +172,7 @@ public void readFixed(byte[] bytes) throws IOException { /** * Reads an enumeration. - * + * * @return The enumeration's value. * @throws AvroTypeException If this is a stateful reader and enumeration is not * the type of the next value to be read. @@ -185,7 +185,7 @@ public void readFixed(byte[] bytes) throws IOException { * returns non-zero, then the caller should read the indicated number of items, * and then call {@link #arrayNext} to find out the number of items in the next * block. The typical pattern for consuming an array looks like: - * + * *

        *   for(long i = in.readArrayStart(); i != 0; i = in.arrayNext()) {
        *     for (long j = 0; j < i; j++) {
    @@ -193,7 +193,7 @@ public void readFixed(byte[] bytes) throws IOException {
        *     }
        *   }
        * 
    - * + * * @throws AvroTypeException If this is a stateful reader and array is not the * type of the next value to be read */ @@ -201,9 +201,9 @@ public void readFixed(byte[] bytes) throws IOException { /** * Processes the next block of an array and returns the number of items in the - * block and let's the caller read those items. - * - * @throws AvroTypeException When called outside of an array context + * block and lets the caller read those items. + * + * @throws AvroTypeException When called outside an array context */ public abstract long arrayNext() throws IOException; @@ -216,7 +216,7 @@ public void readFixed(byte[] bytes) throws IOException { * possible. It will return zero if there are no more items to skip through, or * an item count if it needs the client's help in skipping. The typical usage * pattern is: - * + * *
        *   for(long i = in.skipArray(); i != 0; i = i.skipArray()) {
        *     for (long j = 0; j < i; j++) {
    @@ -224,7 +224,7 @@ public void readFixed(byte[] bytes) throws IOException {
        *     }
        *   }
        * 
    - * + * * Note that this method can automatically skip through items if a byte-count is * found in the underlying data, or if a schema has been provided to the * implementation, but otherwise the client will have to skip through items @@ -240,9 +240,9 @@ public void readFixed(byte[] bytes) throws IOException { * {@link #readArrayStart}. * * As an example, let's say you want to read a map of records, the record - * consisting of an Long field and a Boolean field. Your code would look + * consisting of a Long field and a Boolean field. Your code would look * something like this: - * + * *
        * Map m = new HashMap();
        * Record reuse = new Record();
    @@ -255,7 +255,7 @@ public void readFixed(byte[] bytes) throws IOException {
        *   }
        * }
        * 
    - * + * * @throws AvroTypeException If this is a stateful reader and map is not the * type of the next value to be read */ @@ -264,8 +264,8 @@ public void readFixed(byte[] bytes) throws IOException { /** * Processes the next block of map entries and returns the count of them. * Similar to {@link #arrayNext}. See {@link #readMapStart} for details. - * - * @throws AvroTypeException When called outside of a map context + * + * @throws AvroTypeException When called outside a map context */ public abstract long mapNext() throws IOException; @@ -273,9 +273,9 @@ public void readFixed(byte[] bytes) throws IOException { * Support for quickly skipping through a map similar to {@link #skipArray}. * * As an example, let's say you want to skip a map of records, the record - * consisting of an Long field and a Boolean field. Your code would look + * consisting of a Long field and a Boolean field. Your code would look * something like this: - * + * *
        * for (long i = in.skipMap(); i != 0; i = in.skipMap()) {
        *   for (long j = 0; j < i; j++) {
    @@ -285,7 +285,7 @@ public void readFixed(byte[] bytes) throws IOException {
        *   }
        * }
        * 
    - * + * * @throws AvroTypeException If this is a stateful reader and array is not the * type of the next value to be read */ @@ -294,9 +294,19 @@ public void readFixed(byte[] bytes) throws IOException { /** * Reads the tag of a union written by {@link Encoder#writeIndex}. - * + * * @throws AvroTypeException If this is a stateful reader and union is not the * type of the next value to be read */ public abstract int readIndex() throws IOException; + + /** + * Returns the total number of bytes remaining that can be read from this + * decoder, or {@code -1} if the total is unknown. Implementations that can + * determine remaining capacity (for example, byte-array-backed decoders) should + * override this method. The default returns {@code -1}. + */ + public int remainingBytes() { + return -1; + } } diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/DecoderFactory.java b/lang/java/avro/src/main/java/org/apache/avro/io/DecoderFactory.java index 255e56dfd26..1f9e05dad5e 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/DecoderFactory.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/DecoderFactory.java @@ -24,10 +24,14 @@ /** * A factory for creating and configuring {@link Decoder}s. - *

    + *

    * Factories are thread-safe, and are generally cached by applications for * performance reasons. Multiple instances are only required if multiple * concurrent configurations are needed. + *

    + *

    + * Although Factories are thread-safe the {@link Decoder}s they return are not. + *

    * * @see Decoder */ @@ -70,7 +74,7 @@ public static DecoderFactory get() { * nearest value in the range. Values less than 512 or greater than * 1024*1024 are not recommended. * @return This factory, to enable method chaining: - * + * *
        *         DecoderFactory myFactory = new DecoderFactory().useBinaryDecoderBufferSize(4096);
        *         
    @@ -87,7 +91,7 @@ public DecoderFactory configureDecoderBufferSize(int size) { /** * Returns this factory's configured preferred buffer size. Used when creating * Decoder instances that buffer. See {@link #configureDecoderBufferSize} - * + * * @return The preferred buffer size, in bytes. */ public int getConfiguredBufferSize() { @@ -156,7 +160,7 @@ public BinaryDecoder binaryDecoder(InputStream in, BinaryDecoder reuse) { * In the case that the improved performance of a buffering implementation does * not outweigh the inconvenience of its buffering semantics, a "direct" decoder * can be used. - * + * * @param in The InputStream to initialize to * @param reuse The BinaryDecoder to attempt to reuse given the factory * configuration. A BinaryDecoder implementation may not be @@ -227,11 +231,11 @@ public BinaryDecoder createBinaryDecoder(byte[] bytes, BinaryDecoder reuse) { /** * This method is shorthand for - * + * *
        * createBinaryDecoder(bytes, 0, bytes.length, reuse);
        * 
    - * + * * {@link #binaryDecoder(byte[], int, int, BinaryDecoder)} */ public BinaryDecoder binaryDecoder(byte[] bytes, BinaryDecoder reuse) { diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/DirectBinaryDecoder.java b/lang/java/avro/src/main/java/org/apache/avro/io/DirectBinaryDecoder.java index 7b056556693..ac251550da2 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/DirectBinaryDecoder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/DirectBinaryDecoder.java @@ -20,10 +20,10 @@ import java.io.EOFException; import java.io.IOException; import java.io.InputStream; -import java.nio.Buffer; import java.nio.ByteBuffer; import org.apache.avro.InvalidNumberEncodingException; +import org.apache.avro.SystemLimitException; import org.apache.avro.util.ByteBufferInputStream; /** @@ -40,15 +40,15 @@ class DirectBinaryDecoder extends BinaryDecoder { private class ByteReader { public ByteBuffer read(ByteBuffer old, int length) throws IOException { - ByteBuffer result; + final ByteBuffer result; if (old != null && length <= old.capacity()) { result = old; - ((Buffer) result).clear(); + result.clear(); } else { result = ByteBuffer.allocate(length); } doReadBytes(result.array(), result.position(), length); - ((Buffer) result).limit(length); + result.limit(length); return result; } } @@ -68,7 +68,6 @@ public ByteBuffer read(ByteBuffer old, int length) throws IOException { return bbi.readBuffer(length); } } - } private ByteReader byteReader; @@ -156,8 +155,8 @@ public double readDouble() throws IOException { @Override public ByteBuffer readBytes(ByteBuffer old) throws IOException { - int length = readInt(); - return byteReader.read(old, length); + long length = readLong(); + return byteReader.read(old, SystemLimitException.checkMaxBytesLength(length)); } @Override @@ -191,7 +190,7 @@ public InputStream inputStream() { } @Override - public boolean isEnd() throws IOException { + public boolean isEnd() { throw new UnsupportedOperationException(); } } diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/DirectBinaryEncoder.java b/lang/java/avro/src/main/java/org/apache/avro/io/DirectBinaryEncoder.java index 62b2a482627..df7c118b648 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/DirectBinaryEncoder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/DirectBinaryEncoder.java @@ -27,20 +27,20 @@ * This encoder does not buffer writes, and as a result is slower than * {@link BufferedBinaryEncoder}. However, it is lighter-weight and useful when * the buffering in BufferedBinaryEncoder is not desired and/or the Encoder is - * very short lived. + * very short-lived. *

    * To construct, use * {@link EncoderFactory#directBinaryEncoder(OutputStream, BinaryEncoder)} *

    * DirectBinaryEncoder is not thread-safe - * + * * @see BinaryEncoder * @see EncoderFactory * @see Encoder * @see Decoder */ public class DirectBinaryEncoder extends BinaryEncoder { - private OutputStream out; + protected OutputStream out; // the buffer is used for writing floats, doubles, and large longs. private final byte[] buf = new byte[12]; @@ -48,7 +48,7 @@ public class DirectBinaryEncoder extends BinaryEncoder { * Create a writer that sends its output to the underlying stream * out. **/ - DirectBinaryEncoder(OutputStream out) { + protected DirectBinaryEncoder(OutputStream out) { configure(out); } @@ -69,8 +69,8 @@ public void writeBoolean(boolean b) throws IOException { } /* - * buffering is slower for ints that encode to just 1 or two bytes, and and - * faster for large ones. (Sun JRE 1.6u22, x64 -server) + * buffering is slower for ints that encode to just 1 or two bytes, and faster + * for large ones. (Sun JRE 1.6u22, x64 -server) */ @Override public void writeInt(int n) throws IOException { diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/Encoder.java b/lang/java/avro/src/main/java/org/apache/avro/io/Encoder.java index db3e88b6c85..85d5c421fb6 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/Encoder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/Encoder.java @@ -39,7 +39,7 @@ *

    * {@link EncoderFactory} contains Encoder construction and configuration * facilities. - * + * * @see EncoderFactory * @see Decoder */ @@ -48,7 +48,7 @@ public abstract class Encoder implements Flushable { /** * "Writes" a null value. (Doesn't actually write anything, but advances the * state of the parser if this class is stateful.) - * + * * @throws AvroTypeException If this is a stateful writer and a null is not * expected */ @@ -56,7 +56,7 @@ public abstract class Encoder implements Flushable { /** * Write a boolean value. - * + * * @throws AvroTypeException If this is a stateful writer and a boolean is not * expected */ @@ -64,7 +64,7 @@ public abstract class Encoder implements Flushable { /** * Writes a 32-bit integer. - * + * * @throws AvroTypeException If this is a stateful writer and an integer is not * expected */ @@ -72,7 +72,7 @@ public abstract class Encoder implements Flushable { /** * Write a 64-bit integer. - * + * * @throws AvroTypeException If this is a stateful writer and a long is not * expected */ @@ -80,7 +80,7 @@ public abstract class Encoder implements Flushable { /** * Write a float. - * + * * @throws IOException * @throws AvroTypeException If this is a stateful writer and a float is not * expected @@ -89,7 +89,7 @@ public abstract class Encoder implements Flushable { /** * Write a double. - * + * * @throws AvroTypeException If this is a stateful writer and a double is not * expected */ @@ -97,7 +97,7 @@ public abstract class Encoder implements Flushable { /** * Write a Unicode character string. - * + * * @throws AvroTypeException If this is a stateful writer and a char-string is * not expected */ @@ -107,7 +107,7 @@ public abstract class Encoder implements Flushable { * Write a Unicode character string. The default implementation converts the * String to a {@link org.apache.avro.util.Utf8}. Some Encoder implementations * may want to do something different as a performance optimization. - * + * * @throws AvroTypeException If this is a stateful writer and a char-string is * not expected */ @@ -119,7 +119,7 @@ public void writeString(String str) throws IOException { * Write a Unicode character string. If the CharSequence is an * {@link org.apache.avro.util.Utf8} it writes this directly, otherwise the * CharSequence is converted to a String via toString() and written. - * + * * @throws AvroTypeException If this is a stateful writer and a char-string is * not expected */ @@ -132,7 +132,7 @@ public void writeString(CharSequence charSequence) throws IOException { /** * Write a byte string. - * + * * @throws AvroTypeException If this is a stateful writer and a byte-string is * not expected */ @@ -140,7 +140,7 @@ public void writeString(CharSequence charSequence) throws IOException { /** * Write a byte string. - * + * * @throws AvroTypeException If this is a stateful writer and a byte-string is * not expected */ @@ -149,7 +149,7 @@ public void writeString(CharSequence charSequence) throws IOException { /** * Writes a byte string. Equivalent to * writeBytes(bytes, 0, bytes.length) - * + * * @throws IOException * @throws AvroTypeException If this is a stateful writer and a byte-string is * not expected @@ -160,7 +160,7 @@ public void writeBytes(byte[] bytes) throws IOException { /** * Writes a fixed size binary object. - * + * * @param bytes The contents to write * @param start The position within bytes where the contents start. * @param len The number of bytes to write. @@ -172,8 +172,8 @@ public void writeBytes(byte[] bytes) throws IOException { /** * A shorthand for writeFixed(bytes, 0, bytes.length) - * - * @param bytes + * + * @param bytes the data */ public void writeFixed(byte[] bytes) throws IOException { writeFixed(bytes, 0, bytes.length); @@ -194,8 +194,8 @@ public void writeFixed(ByteBuffer bytes) throws IOException { /** * Writes an enumeration. - * - * @param e + * + * @param e the enumeration to write * @throws AvroTypeException If this is a stateful writer and an enumeration is * not expected or the e is out of range. * @throws IOException @@ -214,9 +214,9 @@ public void writeFixed(ByteBuffer bytes) throws IOException { * the array have been written, call {@link #writeArrayEnd}. * * As an example, let's say you want to write an array of records, the record - * consisting of an Long field and a Boolean field. Your code would look + * consisting of a Long field and a Boolean field. Your code would look * something like this: - * + * *

        * out.writeArrayStart();
        * out.setItemCount(list.size());
    @@ -227,7 +227,7 @@ public void writeFixed(ByteBuffer bytes) throws IOException {
        * }
        * out.writeArrayEnd();
        * 
    - * + * * @throws AvroTypeException If this is a stateful writer and an array is not * expected */ @@ -248,8 +248,8 @@ public void writeFixed(ByteBuffer bytes) throws IOException { /** * Start a new item of an array or map. See {@link #writeArrayStart} for usage * information. - * - * @throws AvroTypeException If called outside of an array or map context + * + * @throws AvroTypeException If called outside an array or map context */ public abstract void startItem() throws IOException; @@ -268,9 +268,9 @@ public void writeFixed(ByteBuffer bytes) throws IOException { * usage. * * As an example of usage, let's say you want to write a map of records, the - * record consisting of an Long field and a Boolean field. Your code would look + * record consisting of a Long field and a Boolean field. Your code would look * something like this: - * + * *
        * out.writeMapStart();
        * out.setItemCount(list.size());
    @@ -282,7 +282,7 @@ public void writeFixed(ByteBuffer bytes) throws IOException {
        * }
        * out.writeMapEnd();
        * 
    - * + * * @throws AvroTypeException If this is a stateful writer and a map is not * expected */ @@ -302,15 +302,15 @@ public void writeFixed(ByteBuffer bytes) throws IOException { * Call this method to write the tag of a union. * * As an example of usage, let's say you want to write a union, whose second - * branch is a record consisting of an Long field and a Boolean field. Your code + * branch is a record consisting of a Long field and a Boolean field. Your code * would look something like this: - * + * *
        * out.writeIndex(1);
        * out.writeLong(record.longField);
        * out.writeBoolean(record.boolField);
        * 
    - * + * * @throws AvroTypeException If this is a stateful writer and a map is not * expected */ diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/EncoderFactory.java b/lang/java/avro/src/main/java/org/apache/avro/io/EncoderFactory.java index 0188a29637d..eaa83ba8baa 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/EncoderFactory.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/EncoderFactory.java @@ -19,6 +19,7 @@ import java.io.IOException; import java.io.OutputStream; +import java.util.EnumSet; import org.apache.avro.AvroRuntimeException; import org.apache.avro.Schema; @@ -71,11 +72,11 @@ public static EncoderFactory get() { * likely to improve performance but may be useful for the * downstream OutputStream. * @return This factory, to enable method chaining: - * + * *
        *         EncoderFactory factory = new EncoderFactory().configureBufferSize(4096);
        *         
    - * + * * @see #binaryEncoder(OutputStream, BinaryEncoder) */ public EncoderFactory configureBufferSize(int size) { @@ -90,7 +91,7 @@ public EncoderFactory configureBufferSize(int size) { /** * Returns this factory's configured default buffer size. Used when creating * Encoder instances that buffer writes. - * + * * @see #configureBufferSize(int) * @see #binaryEncoder(OutputStream, BinaryEncoder) * @return The preferred buffer size, in bytes. @@ -109,11 +110,11 @@ public int getBufferSize() { * outside this range are set to the nearest value in the range. The * encoder will require at least this amount of memory. * @return This factory, to enable method chaining: - * + * *
        *         EncoderFactory factory = new EncoderFactory().configureBlockSize(8000);
        *         
    - * + * * @see #blockingBinaryEncoder(OutputStream, BinaryEncoder) */ public EncoderFactory configureBlockSize(int size) { @@ -131,7 +132,7 @@ public EncoderFactory configureBlockSize(int size) { * #blockingBinaryEncoder(OutputStream, BinaryEncoder) will have block buffers * of this size. *

    - * + * * @see #configureBlockSize(int) * @see #blockingBinaryEncoder(OutputStream, BinaryEncoder) * @return The preferred block size, in bytes. @@ -165,7 +166,6 @@ public int getBlockSize() { * reuse is null, this will be a new instance. If reuse is * not null, then the returned instance may be a new instance or * reuse reconfigured to use out. - * @throws IOException * @see BufferedBinaryEncoder * @see Encoder */ @@ -216,6 +216,49 @@ public BinaryEncoder directBinaryEncoder(OutputStream out, BinaryEncoder reuse) } } + /** + * Creates or reinitializes a {@link BlockingDirectBinaryEncoder} with the + * OutputStream provided as the destination for written data. If reuse is + * provided, an attempt will be made to reconfigure reuse rather than + * construct a new instance, but this is not guaranteed, a new instance may be + * returned. + *

    + * The {@link BinaryEncoder} implementation returned does not buffer its output, + * calling {@link Encoder#flush()} will simply cause the wrapped OutputStream to + * be flushed. + *

    + * The {@link BlockingDirectBinaryEncoder} will write the block sizes for the + * arrays and maps so efficient skipping can be done. + *

    + * Performance of unbuffered writes can be significantly slower than buffered + * writes. {@link #binaryEncoder(OutputStream, BinaryEncoder)} returns + * BinaryEncoder instances that are tuned for performance but may buffer output. + * The unbuffered, 'direct' encoder may be desired when buffering semantics are + * problematic, or if the lifetime of the encoder is so short that the buffer + * would not be useful. + *

    + * {@link BinaryEncoder} instances returned by this method are not thread-safe. + * + * @param out The OutputStream to initialize to. Cannot be null. + * @param reuse The BinaryEncoder to attempt to reuse given the factory + * configuration. A BinaryEncoder implementation may not be + * compatible with reuse, causing a new instance to be returned. If + * null, a new instance is returned. + * @return A BinaryEncoder that uses out as its data output. If + * reuse is null, this will be a new instance. If reuse is + * not null, then the returned instance may be a new instance or + * reuse reconfigured to use out. + * @see DirectBinaryEncoder + * @see Encoder + */ + public BinaryEncoder blockingDirectBinaryEncoder(OutputStream out, BinaryEncoder reuse) { + if (null == reuse || !reuse.getClass().equals(BlockingDirectBinaryEncoder.class)) { + return new BlockingDirectBinaryEncoder(out); + } else { + return ((DirectBinaryEncoder) reuse).configure(out); + } + } + /** * Creates or reinitializes a {@link BinaryEncoder} with the OutputStream * provided as the destination for written data. If reuse is provided, an @@ -243,7 +286,6 @@ public BinaryEncoder directBinaryEncoder(OutputStream out, BinaryEncoder reuse) * reuse is null, this will be a new instance. If reuse is * not null, then the returned instance may be a new instance or * reuse reconfigured to use out. - * @throws IOException * @see BlockingBinaryEncoder * @see Encoder */ @@ -297,6 +339,38 @@ public JsonEncoder jsonEncoder(Schema schema, OutputStream out, boolean pretty) return new JsonEncoder(schema, out, pretty); } + /** + * Creates a {@link JsonEncoder} using the OutputStream provided for writing + * data conforming to the Schema provided with optional pretty printing. + *

    + * {@link JsonEncoder} buffers its output. Data may not appear on the underlying + * OutputStream until {@link Encoder#flush()} is called. + *

    + * {@link JsonEncoder} is not thread-safe. + * + * @param schema The Schema for data written to this JsonEncoder. Cannot be + * null. + * @param out The OutputStream to write to. Cannot be null. + * @param pretty Pretty print encoding. + * @param autoflush Whether to Automatically flush the data to storage, default + * is true controls the underlying FLUSH_PASSED_TO_STREAM + * feature of JsonGenerator + * @return A JsonEncoder configured with out, schema and + * pretty + * @throws IOException + */ + public JsonEncoder jsonEncoder(Schema schema, OutputStream out, boolean pretty, boolean autoflush) + throws IOException { + EnumSet options = EnumSet.noneOf(JsonEncoder.JsonOptions.class); + if (pretty) { + options.add(JsonEncoder.JsonOptions.Pretty); + } + if (!autoflush) { + options.add(JsonEncoder.JsonOptions.NoFlushStream); + } + return new JsonEncoder(schema, out, options); + } + /** * Creates a {@link JsonEncoder} using the {@link JsonGenerator} provided for * output of data conforming to the Schema provided. @@ -327,7 +401,7 @@ JsonEncoder jsonEncoder(Schema schema, JsonGenerator gen) throws IOException { * {@link ValidatingEncoder} is not thread-safe. * * @param schema The Schema to validate operations against. Cannot be null. - * @param encoder The Encoder to wrap. Cannot be be null. + * @param encoder The Encoder to wrap. Cannot be null. * @return A ValidatingEncoder configured to wrap encoder and validate * against schema * @throws IOException diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/FastReaderBuilder.java b/lang/java/avro/src/main/java/org/apache/avro/io/FastReaderBuilder.java index f6e1ed5aae2..512c9ebf34f 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/FastReaderBuilder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/FastReaderBuilder.java @@ -52,6 +52,7 @@ import org.apache.avro.reflect.ReflectionUtil; import org.apache.avro.specific.SpecificData; import org.apache.avro.specific.SpecificRecordBase; +import org.apache.avro.util.ClassUtils; import org.apache.avro.util.Utf8; import org.apache.avro.util.WeakIdentityHashMap; import org.apache.avro.util.internal.Accessor; @@ -140,7 +141,7 @@ private RecordReader createRecordReader(RecordAdjust action) throws IOException return recordReader; } - private RecordReader initializeRecordReader(RecordReader recordReader, RecordAdjust action) throws IOException { + private void initializeRecordReader(RecordReader recordReader, RecordAdjust action) throws IOException { recordReader.startInitialization(); // generate supplier for the new object instances @@ -171,7 +172,6 @@ private RecordReader initializeRecordReader(RecordReader recordReader, RecordAdj } recordReader.finishInitialization(readSteps, action.reader, action.instanceSupplier); - return recordReader; } private ExecutionStep createFieldSetter(Field field, FieldReader reader) { @@ -197,7 +197,16 @@ private ExecutionStep getDefaultingStep(Schema.Field field) throws IOException { } else if (defaultValue instanceof Utf8) { return createFieldSetter(field, reusingReader((old, d) -> readUtf8(old, (Utf8) defaultValue))); } else if (defaultValue instanceof List && ((List) defaultValue).isEmpty()) { - return createFieldSetter(field, reusingReader((old, d) -> data.newArray(old, 0, field.schema()))); + Schema arraySchema = field.schema(); + if (arraySchema.getType() == Schema.Type.UNION) { + arraySchema = arraySchema.getTypes().stream() + .filter(nestedSchema -> nestedSchema.getType() == Schema.Type.ARRAY).findFirst() + .orElseThrow(() -> new AvroTypeException(String.format( + "Union schema %s has a default value of type Array, but none of the union types is of type Array", + field.schema().toString()))); + } + final Schema schema = arraySchema; + return createFieldSetter(field, reusingReader((old, d) -> data.newArray(old, 0, schema))); } else if (defaultValue instanceof Map && ((Map) defaultValue).isEmpty()) { return createFieldSetter(field, reusingReader((old, d) -> data.newMap(old, 0))); } else { @@ -277,7 +286,7 @@ private FieldReader getNonConvertedReader(Action action) throws IOException { throw new IllegalStateException("Error getting reader for action type " + action.getClass()); } case DO_NOTHING: - return getReaderForBaseType(action.reader, action.writer); + return getReaderForBaseType(action.reader); case RECORD: return createRecordReader((RecordAdjust) action); case ENUM: @@ -297,7 +306,7 @@ private FieldReader getNonConvertedReader(Action action) throws IOException { } } - private FieldReader getReaderForBaseType(Schema readerSchema, Schema writerSchema) throws IOException { + private FieldReader getReaderForBaseType(Schema readerSchema) { switch (readerSchema.getType()) { case NULL: return (old, decoder) -> { @@ -307,7 +316,7 @@ private FieldReader getReaderForBaseType(Schema readerSchema, Schema writerSchem case BOOLEAN: return (old, decoder) -> decoder.readBoolean(); case STRING: - return createStringReader(readerSchema, writerSchema); + return createStringReader(readerSchema); case INT: return (old, decoder) -> decoder.readInt(); case LONG: @@ -319,7 +328,7 @@ private FieldReader getReaderForBaseType(Schema readerSchema, Schema writerSchem case BYTES: return createBytesReader(); case FIXED: - return createFixedReader(readerSchema, writerSchema); + return createFixedReader(readerSchema); case RECORD: // covered by action type case UNION: // covered by action type case ENUM: // covered by action type @@ -330,7 +339,7 @@ private FieldReader getReaderForBaseType(Schema readerSchema, Schema writerSchem } } - private FieldReader createPromotingReader(Promote promote) throws IOException { + private FieldReader createPromotingReader(Promote promote) { switch (promote.reader.getType()) { case BYTES: return (reuse, decoder) -> ByteBuffer.wrap(decoder.readString(null).getBytes()); @@ -364,7 +373,7 @@ private FieldReader createPromotingReader(Promote promote) throws IOException { "No promotion possible for type " + promote.writer.getType() + " to " + promote.reader.getType()); } - private FieldReader createStringReader(Schema readerSchema, Schema writerSchema) { + private FieldReader createStringReader(Schema readerSchema) { FieldReader stringReader = createSimpleStringReader(readerSchema); if (isClassPropEnabled()) { return getTransformingStringReader(readerSchema.getProp(SpecificData.CLASS_PROP), stringReader); @@ -438,7 +447,11 @@ private FieldReader getTransformingStringReader(String valueClass, FieldReader s Function transformer = findClass(valueClass) .map(clazz -> ReflectionUtil.getConstructorAsFunction(String.class, clazz)).orElse(null); if (transformer != null) { - return (old, decoder) -> transformer.apply((String) stringReader.read(null, decoder)); + return (old, decoder) -> { + Object value = stringReader.read(null, decoder); + String stringValue = value instanceof Utf8 ? ((Utf8) value).toString() : (String) value; + return transformer.apply(stringValue); + }; } } @@ -447,8 +460,8 @@ private FieldReader getTransformingStringReader(String valueClass, FieldReader s private Optional> findClass(String clazz) { try { - return Optional.of(data.getClassLoader().loadClass(clazz)); - } catch (ReflectiveOperationException e) { + return Optional.of(ClassUtils.forName(data.getClassLoader(), clazz)); + } catch (ClassNotFoundException e) { return Optional.empty(); } } @@ -497,7 +510,7 @@ private FieldReader createEnumReader(EnumAdjust action) { }); } - private FieldReader createFixedReader(Schema readerSchema, Schema writerSchema) { + private FieldReader createFixedReader(Schema readerSchema) { return reusingReader((reuse, decoder) -> { GenericFixed fixed = (GenericFixed) data.createFixed(reuse, readerSchema); decoder.readFixed(fixed.bytes(), 0, readerSchema.getFixedSize()); @@ -516,9 +529,9 @@ public static FieldReader reusingReader(ReusingFieldReader reader) { public interface FieldReader extends DatumReader { @Override - public Object read(Object reuse, Decoder decoder) throws IOException; + Object read(Object reuse, Decoder decoder) throws IOException; - public default boolean canReuse() { + default boolean canReuse() { return false; } @@ -530,7 +543,7 @@ default void setSchema(Schema schema) { public interface ReusingFieldReader extends FieldReader { @Override - public default boolean canReuse() { + default boolean canReuse() { return true; } } @@ -608,7 +621,7 @@ public Object read(Object reuse, Decoder decoder) throws IOException { } public interface ExecutionStep { - public void execute(Object record, Decoder decoder) throws IOException; + void execute(Object record, Decoder decoder) throws IOException; } } diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/JsonDecoder.java b/lang/java/avro/src/main/java/org/apache/avro/io/JsonDecoder.java index c1c38511ab4..1876f87aaac 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/JsonDecoder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/JsonDecoder.java @@ -47,7 +47,7 @@ */ public class JsonDecoder extends ParsingDecoder implements Parser.ActionHandler { private JsonParser in; - private static JsonFactory jsonFactory = new JsonFactory(); + private static final JsonFactory JSON_FACTORY = new JsonFactory(); Stack reorderBuffers = new Stack<>(); ReorderBuffer currentReorderBuffer; @@ -86,7 +86,7 @@ private static Symbol getSymbol(Schema schema) { *

    * Otherwise, this JsonDecoder will reset its state and then reconfigure its * input. - * + * * @param in The InputStream to read from. Cannot be null. * @throws IOException * @throws NullPointerException if {@code in} is {@code null} @@ -97,7 +97,7 @@ public JsonDecoder configure(InputStream in) throws IOException { parser.reset(); reorderBuffers.clear(); currentReorderBuffer = null; - this.in = jsonFactory.createParser(in); + this.in = JSON_FACTORY.createParser(in); this.in.nextToken(); return this; } @@ -109,7 +109,7 @@ public JsonDecoder configure(InputStream in) throws IOException { *

    * Otherwise, this JsonDecoder will reset its state and then reconfigure its * input. - * + * * @param in The String to read from. Cannot be null. * @throws IOException * @throws NullPointerException if {@code in} is {@code null} @@ -157,25 +157,39 @@ public boolean readBoolean() throws IOException { @Override public int readInt() throws IOException { advance(Symbol.INT); - if (in.getCurrentToken().isNumeric()) { + if (in.getCurrentToken() == JsonToken.VALUE_NUMBER_INT) { int result = in.getIntValue(); in.nextToken(); return result; - } else { - throw error("int"); } + if (in.getCurrentToken() == JsonToken.VALUE_NUMBER_FLOAT) { + float value = in.getFloatValue(); + if (Math.abs(value - Math.round(value)) <= Float.MIN_VALUE) { + int result = Math.round(value); + in.nextToken(); + return result; + } + } + throw error("int"); } @Override public long readLong() throws IOException { advance(Symbol.LONG); - if (in.getCurrentToken().isNumeric()) { + if (in.getCurrentToken() == JsonToken.VALUE_NUMBER_INT) { long result = in.getLongValue(); in.nextToken(); return result; - } else { - throw error("long"); } + if (in.getCurrentToken() == JsonToken.VALUE_NUMBER_FLOAT) { + double value = in.getDoubleValue(); + if (Math.abs(value - Math.round(value)) <= Double.MIN_VALUE) { + long result = Math.round(value); + in.nextToken(); + return result; + } + } + throw error("long"); } @Override @@ -185,6 +199,19 @@ public float readFloat() throws IOException { float result = in.getFloatValue(); in.nextToken(); return result; + } else if (in.getCurrentToken() == JsonToken.VALUE_STRING) { + String stringValue = in.getText(); + in.nextToken(); + if (isNaNString(stringValue)) { + return Float.NaN; + } + if (isNegativeInfinityString(stringValue)) { + return Float.NEGATIVE_INFINITY; + } + if (isPositiveInfinityString(stringValue)) { + return Float.POSITIVE_INFINITY; + } + throw error("float"); } else { throw error("float"); } @@ -197,11 +224,42 @@ public double readDouble() throws IOException { double result = in.getDoubleValue(); in.nextToken(); return result; + } else if (in.getCurrentToken() == JsonToken.VALUE_STRING) { + String stringValue = in.getText(); + in.nextToken(); + if (isNaNString(stringValue)) { + return Double.NaN; + } + if (isNegativeInfinityString(stringValue)) { + return Double.NEGATIVE_INFINITY; + } + if (isPositiveInfinityString(stringValue)) { + return Double.POSITIVE_INFINITY; + } + throw error("double"); } else { throw error("double"); } } + // check whether the given string represents an IEEE 754 'NaN' string value as + // serialized by Jackson + private static boolean isNaNString(String value) { + return "NaN".equals(value); + } + + // check whether the given string represents an IEEE 754 'Infinity' string value + // as serialized by Jackson + private static boolean isPositiveInfinityString(String value) { + return "Infinity".equals(value) || "INF".equals(value); + } + + // check whether the given string represents an IEEE 754 '-Infinity' string + // value as serialized by Jackson + private static boolean isNegativeInfinityString(String value) { + return "-Infinity".equals(value) || "-INF".equals(value); + } + @Override public Utf8 readString(Utf8 old) throws IOException { return new Utf8(readString()); @@ -254,8 +312,7 @@ public ByteBuffer readBytes(ByteBuffer old) throws IOException { } private byte[] readByteArray() throws IOException { - byte[] result = in.getText().getBytes(StandardCharsets.ISO_8859_1); - return result; + return in.getText().getBytes(StandardCharsets.ISO_8859_1); } @Override diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/JsonEncoder.java b/lang/java/avro/src/main/java/org/apache/avro/io/JsonEncoder.java index 71cc690b8a4..4613f403d04 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/JsonEncoder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/JsonEncoder.java @@ -22,7 +22,9 @@ import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.util.BitSet; +import java.util.EnumSet; import java.util.Objects; +import java.util.Set; import org.apache.avro.AvroTypeException; import org.apache.avro.Schema; @@ -33,6 +35,7 @@ import com.fasterxml.jackson.core.JsonEncoding; import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.PrettyPrinter; import com.fasterxml.jackson.core.util.DefaultPrettyPrinter; import com.fasterxml.jackson.core.util.MinimalPrettyPrinter; @@ -47,7 +50,7 @@ * JsonEncoder is not thread-safe. */ public class JsonEncoder extends ParsingEncoder implements Parser.ActionHandler { - private static final String LINE_SEPARATOR = System.getProperty("line.separator"); + private static final String LINE_SEPARATOR = System.lineSeparator(); final Parser parser; private JsonGenerator out; private boolean includeNamespace = true; @@ -58,11 +61,15 @@ public class JsonEncoder extends ParsingEncoder implements Parser.ActionHandler protected BitSet isEmpty = new BitSet(); JsonEncoder(Schema sc, OutputStream out) throws IOException { - this(sc, getJsonGenerator(out, false)); + this(sc, getJsonGenerator(out, EnumSet.noneOf(JsonOptions.class))); } JsonEncoder(Schema sc, OutputStream out, boolean pretty) throws IOException { - this(sc, getJsonGenerator(out, pretty)); + this(sc, getJsonGenerator(out, pretty ? EnumSet.of(JsonOptions.Pretty) : EnumSet.noneOf(JsonOptions.class))); + } + + JsonEncoder(Schema sc, OutputStream out, Set options) throws IOException { + this(sc, getJsonGenerator(out, options)); } JsonEncoder(Schema sc, JsonGenerator out) throws IOException { @@ -78,24 +85,28 @@ public void flush() throws IOException { } } + enum JsonOptions { + Pretty, + + // Prevent underlying OutputStream to be flush for optimisation purpose. + NoFlushStream + } + // by default, one object per line. // with pretty option use default pretty printer with root line separator. - private static JsonGenerator getJsonGenerator(OutputStream out, boolean pretty) throws IOException { + private static JsonGenerator getJsonGenerator(OutputStream out, Set options) throws IOException { Objects.requireNonNull(out, "OutputStream cannot be null"); JsonGenerator g = new JsonFactory().createGenerator(out, JsonEncoding.UTF8); - if (pretty) { - DefaultPrettyPrinter pp = new DefaultPrettyPrinter() { - @Override - public void writeRootValueSeparator(JsonGenerator jg) throws IOException { - jg.writeRaw(LINE_SEPARATOR); - } - }; - g.setPrettyPrinter(pp); + if (options.contains(JsonOptions.NoFlushStream)) { + g.configure(JsonGenerator.Feature.FLUSH_PASSED_TO_STREAM, false); + } + final PrettyPrinter pp; + if (options.contains(JsonOptions.Pretty)) { + pp = new DefaultPrettyPrinter(LINE_SEPARATOR); } else { - MinimalPrettyPrinter pp = new MinimalPrettyPrinter(); - pp.setRootValueSeparator(LINE_SEPARATOR); - g.setPrettyPrinter(pp); + pp = new MinimalPrettyPrinter(LINE_SEPARATOR); } + g.setPrettyPrinter(pp); return g; } @@ -122,7 +133,29 @@ public void setIncludeNamespace(final boolean includeNamespace) { * @return this JsonEncoder */ public JsonEncoder configure(OutputStream out) throws IOException { - this.configure(getJsonGenerator(out, false)); + return this.configure(out, true); + } + + /** + * Reconfigures this JsonEncoder to use the output stream provided. + *

    + * If the OutputStream provided is null, a NullPointerException is thrown. + *

    + * Otherwise, this JsonEncoder will flush its current output and then + * reconfigure its output to use a default UTF8 JsonGenerator that writes to the + * provided OutputStream. + * + * @param out The OutputStream to direct output to. Cannot be null. + * @throws IOException + * @throws NullPointerException if {@code out} is {@code null} + * @return this JsonEncoder + */ + public JsonEncoder configure(OutputStream out, boolean autoflush) throws IOException { + EnumSet jsonOptions = EnumSet.noneOf(JsonOptions.class); + if (!autoflush) { + jsonOptions.add(JsonOptions.NoFlushStream); + } + this.configure(getJsonGenerator(out, jsonOptions)); return this; } @@ -137,15 +170,13 @@ public JsonEncoder configure(OutputStream out) throws IOException { * @param generator The JsonGenerator to direct output to. Cannot be null. * @throws IOException * @throws NullPointerException if {@code generator} is {@code null} - * @return this JsonEncoder */ - private JsonEncoder configure(JsonGenerator generator) throws IOException { + private void configure(JsonGenerator generator) throws IOException { Objects.requireNonNull(generator, "JsonGenerator cannot be null"); if (null != parser) { flush(); } this.out = generator; - return this; } @Override diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/ResolvingDecoder.java b/lang/java/avro/src/main/java/org/apache/avro/io/ResolvingDecoder.java index 6f119a39b65..6bdb16a332c 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/ResolvingDecoder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/ResolvingDecoder.java @@ -140,7 +140,7 @@ public final Schema.Field[] readFieldOrderIfDiff() throws IOException { /** * Consume any more data that has been written by the writer but not needed by - * the reader so that the the underlying decoder is in proper shape for the next + * the reader so that the underlying decoder is in proper shape for the next * record. This situation happens when, for example, the writer writes a record * with two fields and the reader needs only the first field. * @@ -187,11 +187,11 @@ public float readFloat() throws IOException { public double readDouble() throws IOException { Symbol actual = parser.advance(Symbol.DOUBLE); if (actual == Symbol.INT) { - return (double) in.readInt(); + return in.readInt(); } else if (actual == Symbol.LONG) { return (double) in.readLong(); } else if (actual == Symbol.FLOAT) { - return (double) in.readFloat(); + return in.readFloat(); } else { assert actual == Symbol.DOUBLE; return in.readDouble(); diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/ValidatingDecoder.java b/lang/java/avro/src/main/java/org/apache/avro/io/ValidatingDecoder.java index dbee4458575..26f79a16ff2 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/ValidatingDecoder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/ValidatingDecoder.java @@ -246,4 +246,9 @@ public int readIndex() throws IOException { public Symbol doAction(Symbol input, Symbol top) throws IOException { return null; } + + @Override + public int remainingBytes() { + return in != null ? in.remainingBytes() : -1; + } } diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/ValidatingEncoder.java b/lang/java/avro/src/main/java/org/apache/avro/io/ValidatingEncoder.java index d7440c7406e..d61967751a0 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/ValidatingEncoder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/ValidatingEncoder.java @@ -36,7 +36,7 @@ * and configure. *

    * ValidatingEncoder is not thread-safe. - * + * * @see Encoder * @see EncoderFactory */ @@ -44,12 +44,12 @@ public class ValidatingEncoder extends ParsingEncoder implements Parser.ActionHa protected Encoder out; protected final Parser parser; - ValidatingEncoder(Symbol root, Encoder out) throws IOException { + ValidatingEncoder(Symbol root, Encoder out) { this.out = out; this.parser = new Parser(root, this); } - ValidatingEncoder(Schema schema, Encoder in) throws IOException { + ValidatingEncoder(Schema schema, Encoder in) { this(new ValidatingGrammarGenerator().generate(schema), in); } @@ -60,7 +60,7 @@ public void flush() throws IOException { /** * Reconfigures this ValidatingEncoder to wrap the encoder provided. - * + * * @param encoder The Encoder to wrap for validation. * @return This ValidatingEncoder. */ diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/parsing/Parser.java b/lang/java/avro/src/main/java/org/apache/avro/io/parsing/Parser.java index 12fc4044a9c..89269578d2c 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/parsing/Parser.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/parsing/Parser.java @@ -139,7 +139,7 @@ public final void processTrailingImplicitActions() throws IOException { * repeater and input is either {@link Symbol#ARRAY_END} or * {@link Symbol#MAP_END} pushes nothing. * - * @param sym + * @param sym the symbol */ public final void pushProduction(Symbol sym) { Symbol[] p = sym.production; diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/parsing/ResolvingGrammarGenerator.java b/lang/java/avro/src/main/java/org/apache/avro/io/parsing/ResolvingGrammarGenerator.java index 77fbe1c7ad0..77acbd95241 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/parsing/ResolvingGrammarGenerator.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/parsing/ResolvingGrammarGenerator.java @@ -223,7 +223,7 @@ private Symbol simpleGen(Schema s, Map seen) { } } - private static EncoderFactory factory = new EncoderFactory().configureBufferSize(32); + private final static EncoderFactory ENCODER_FACTORY = new EncoderFactory().configureBufferSize(32); /** * Returns the Avro binary encoded version of n according to the schema @@ -236,7 +236,7 @@ private Symbol simpleGen(Schema s, Map seen) { */ private static byte[] getBinary(Schema s, JsonNode n) throws IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); - Encoder e = factory.binaryEncoder(out, null); + Encoder e = ENCODER_FACTORY.binaryEncoder(out, null); encode(e, s, n); e.flush(); return out.toByteArray(); @@ -292,8 +292,16 @@ public static void encode(Encoder e, Schema s, JsonNode n) throws IOException { e.writeMapEnd(); break; case UNION: - e.writeIndex(0); - encode(e, s.getTypes().get(0), n); + int correctIndex = 0; + List innerTypes = s.getTypes(); + while (correctIndex < innerTypes.size() && !isCompatible(innerTypes.get(correctIndex).getType(), n)) { + correctIndex++; + } + if (correctIndex >= innerTypes.size()) { + throw new AvroTypeException("Not compatible default value for union: " + n); + } + e.writeIndex(correctIndex); + encode(e, innerTypes.get(correctIndex), n); break; case FIXED: if (!n.isTextual()) @@ -346,4 +354,29 @@ public static void encode(Encoder e, Schema s, JsonNode n) throws IOException { break; } } + + private static boolean isCompatible(Schema.Type stype, JsonNode value) { + switch (stype) { + case RECORD: + case ENUM: + case ARRAY: + case MAP: + case UNION: + return true; + case FIXED: + case STRING: + case BYTES: + return value.isTextual(); + case INT: + case LONG: + case FLOAT: + case DOUBLE: + return value.isNumber(); + case BOOLEAN: + return value.isBoolean(); + case NULL: + return value.isNull(); + } + return true; + } } diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/parsing/Symbol.java b/lang/java/avro/src/main/java/org/apache/avro/io/parsing/Symbol.java index a18f3fdbcd5..b5dcbeb68f0 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/parsing/Symbol.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/parsing/Symbol.java @@ -51,15 +51,15 @@ public enum Kind { IMPLICIT_ACTION, /** non-terminal action symbol which is explicitly consumed */ EXPLICIT_ACTION - }; + } /// The kind of this symbol. public final Kind kind; /** * The production for this symbol. If this symbol is a terminal this is - * null. Otherwise this holds the the sequence of the symbols that - * forms the production for this symbol. The sequence is in the reverse order of + * null. Otherwise this holds the sequence of the symbols that forms + * the production for this symbol. The sequence is in the reverse order of * production. This is useful for easy copying onto parsing stack. * * Please note that this is a final. So the production for a symbol should be @@ -94,7 +94,7 @@ static Symbol root(Symbol... symbols) { /** * A convenience method to construct a sequence. - * + * * @param production The constituent symbols of the sequence. */ static Symbol seq(Symbol... production) { @@ -103,7 +103,7 @@ static Symbol seq(Symbol... production) { /** * A convenience method to construct a repeater. - * + * * @param symsToRepeat The symbols to repeat in the repeater. */ static Symbol repeat(Symbol endSymbol, Symbol... symsToRepeat) { @@ -119,7 +119,7 @@ static Symbol alt(Symbol[] symbols, String[] labels) { /** * A convenience method to construct an ErrorAction. - * + * * @param e */ static Symbol error(String e) { @@ -128,7 +128,7 @@ static Symbol error(String e) { /** * A convenience method to construct a ResolvingAction. - * + * * @param w The writer symbol * @param r The reader symbol */ @@ -201,7 +201,7 @@ public int flattenedSize() { * @param skip The position where the output input sub-array starts. * @param map A map of symbols which have already been expanded. Useful for * handling recursive definitions and for caching. - * @param map2 A map to to store the list of fix-ups. + * @param map2 A map to store the list of fix-ups. */ static void flatten(Symbol[] in, int start, Symbol[] out, int skip, Map map, Map> map2) { @@ -238,7 +238,7 @@ private static void copyFixups(List fixups, Symbol[] out, int outPos, Sym /** * Returns the amount of space required to flatten the given sub-array of * symbols. - * + * * @param symbols The array of input symbols. * @param start The index where the subarray starts. * @return The number of symbols that will be produced if one expands the given @@ -317,7 +317,7 @@ public int size() { @Override public Iterator iterator() { - return new Iterator() { + return new Iterator<>() { private int pos = production.length; @Override diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/parsing/ValidatingGrammarGenerator.java b/lang/java/avro/src/main/java/org/apache/avro/io/parsing/ValidatingGrammarGenerator.java index 7798f520ae6..2f2e9cdc1c0 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/parsing/ValidatingGrammarGenerator.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/parsing/ValidatingGrammarGenerator.java @@ -41,7 +41,7 @@ public Symbol generate(Schema schema) { * given schema sc. If there is already an entry for the given schema * in the given map seen then that entry is returned. Otherwise a new * symbol is generated and an entry is inserted into the map. - * + * * @param sc The schema for which the start symbol is required * @param seen A map of schema to symbol mapping done so far. * @return The start symbol for the schema diff --git a/lang/java/avro/src/main/java/org/apache/avro/message/BinaryMessageDecoder.java b/lang/java/avro/src/main/java/org/apache/avro/message/BinaryMessageDecoder.java index 61d8ef777ff..46d1d04b8bd 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/message/BinaryMessageDecoder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/message/BinaryMessageDecoder.java @@ -21,6 +21,7 @@ import org.apache.avro.Schema; import org.apache.avro.SchemaNormalization; import org.apache.avro.generic.GenericData; +import org.apache.avro.util.internal.ThreadLocalWithInitial; import java.io.IOException; import java.io.InputStream; @@ -47,9 +48,9 @@ */ public class BinaryMessageDecoder extends MessageDecoder.BaseDecoder { - private static final ThreadLocal HEADER_BUFFER = ThreadLocal.withInitial(() -> new byte[10]); + private static final ThreadLocal HEADER_BUFFER = ThreadLocalWithInitial.of(() -> new byte[10]); - private static final ThreadLocal FP_BUFFER = ThreadLocal.withInitial(() -> { + private static final ThreadLocal FP_BUFFER = ThreadLocalWithInitial.of(() -> { byte[] header = HEADER_BUFFER.get(); return ByteBuffer.wrap(header).order(ByteOrder.LITTLE_ENDIAN); }); @@ -121,7 +122,7 @@ public BinaryMessageDecoder(GenericData model, Schema readSchema, SchemaStore re public void addSchema(Schema writeSchema) { long fp = SchemaNormalization.parsingFingerprint64(writeSchema); final Schema actualReadSchema = this.readSchema != null ? this.readSchema : writeSchema; - codecByFingerprint.put(fp, new RawMessageDecoder(model, writeSchema, actualReadSchema)); + codecByFingerprint.put(fp, new RawMessageDecoder<>(model, writeSchema, actualReadSchema)); } private RawMessageDecoder getDecoder(long fp) { diff --git a/lang/java/avro/src/main/java/org/apache/avro/message/MessageDecoder.java b/lang/java/avro/src/main/java/org/apache/avro/message/MessageDecoder.java index 8a1e8526f26..007dfed2f90 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/message/MessageDecoder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/message/MessageDecoder.java @@ -21,6 +21,8 @@ import org.apache.avro.util.ReusableByteArrayInputStream; import org.apache.avro.util.ReusableByteBufferInputStream; +import org.apache.avro.util.internal.ThreadLocalWithInitial; + import java.io.IOException; import java.io.InputStream; import java.nio.ByteBuffer; @@ -111,11 +113,11 @@ public interface MessageDecoder { */ abstract class BaseDecoder implements MessageDecoder { - private static final ThreadLocal BYTE_ARRAY_IN = ThreadLocal - .withInitial(ReusableByteArrayInputStream::new); + private static final ThreadLocal BYTE_ARRAY_IN = ThreadLocalWithInitial + .of(ReusableByteArrayInputStream::new); - private static final ThreadLocal BYTE_BUFFER_IN = ThreadLocal - .withInitial(ReusableByteBufferInputStream::new); + private static final ThreadLocal BYTE_BUFFER_IN = ThreadLocalWithInitial + .of(ReusableByteBufferInputStream::new); @Override public D decode(InputStream stream) throws IOException { diff --git a/lang/java/avro/src/main/java/org/apache/avro/message/RawMessageDecoder.java b/lang/java/avro/src/main/java/org/apache/avro/message/RawMessageDecoder.java index ad2b1d31e49..917e5be88e3 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/message/RawMessageDecoder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/message/RawMessageDecoder.java @@ -78,9 +78,7 @@ public RawMessageDecoder(GenericData model, Schema schema) { * @param writeSchema the {@link Schema} used to decode buffers */ public RawMessageDecoder(GenericData model, Schema writeSchema, Schema readSchema) { - Schema writeSchema1 = writeSchema; - Schema readSchema1 = readSchema; - this.reader = model.createDatumReader(writeSchema1, readSchema1); + this.reader = model.createDatumReader(writeSchema, readSchema); } @Override diff --git a/lang/java/avro/src/main/java/org/apache/avro/message/RawMessageEncoder.java b/lang/java/avro/src/main/java/org/apache/avro/message/RawMessageEncoder.java index 093783e8614..230c6c1feab 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/message/RawMessageEncoder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/message/RawMessageEncoder.java @@ -24,6 +24,8 @@ import org.apache.avro.io.BinaryEncoder; import org.apache.avro.io.DatumWriter; import org.apache.avro.io.EncoderFactory; +import org.apache.avro.util.internal.ThreadLocalWithInitial; + import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; @@ -37,7 +39,7 @@ */ public class RawMessageEncoder implements MessageEncoder { - private static final ThreadLocal TEMP = ThreadLocal.withInitial(BufferOutputStream::new); + private static final ThreadLocal TEMP = ThreadLocalWithInitial.of(BufferOutputStream::new); private static final ThreadLocal ENCODER = new ThreadLocal<>(); @@ -79,9 +81,8 @@ public RawMessageEncoder(GenericData model, Schema schema) { * @param shouldCopy whether to copy buffers before returning encoded results */ public RawMessageEncoder(GenericData model, Schema schema, boolean shouldCopy) { - Schema writeSchema = schema; this.copyOutputBytes = shouldCopy; - this.writer = model.createDatumWriter(writeSchema); + this.writer = model.createDatumWriter(schema); } @Override diff --git a/lang/java/avro/src/main/java/org/apache/avro/path/ArrayPositionPredicate.java b/lang/java/avro/src/main/java/org/apache/avro/path/ArrayPositionPredicate.java new file mode 100644 index 00000000000..480d02614ca --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/path/ArrayPositionPredicate.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.path; + +/** + * Returns items by their position (numeric index) in an array + */ +public class ArrayPositionPredicate implements PositionalPathPredicate { + private final long index; + + public ArrayPositionPredicate(long index) { + this.index = index; + } + + @Override + public String toString() { + return "[" + index + "]"; + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/path/LocationStep.java b/lang/java/avro/src/main/java/org/apache/avro/path/LocationStep.java new file mode 100644 index 00000000000..c8442dfaef9 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/path/LocationStep.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.path; + +/** + * Selects items based on their "path" (name of a property under which they are + * stored) relative to the context. + */ +public class LocationStep implements PathElement { + /** + * selector part of location step. either "." or ".." + */ + private final String selector; + /** + * name of a property to select + */ + private final String propertyName; + + public LocationStep(String selector, String propertyName) { + this.selector = selector; + this.propertyName = propertyName; + } + + @Override + public String toString() { + if (propertyName == null || propertyName.isEmpty()) { + return selector; + } + return selector + propertyName; + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/path/MapKeyPredicate.java b/lang/java/avro/src/main/java/org/apache/avro/path/MapKeyPredicate.java new file mode 100644 index 00000000000..b183d8459d3 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/path/MapKeyPredicate.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.path; + +/** + * Returns items by their position (string key under which they are stored) in a + * map + */ +public class MapKeyPredicate implements PositionalPathPredicate { + private final String key; + + public MapKeyPredicate(String key) { + this.key = key; + } + + public String getKey() { + return key; + } + + @Override + public String toString() { + if (key == null) { + return ""; + } + return "[\"" + key + "\"]"; + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/path/PathElement.java b/lang/java/avro/src/main/java/org/apache/avro/path/PathElement.java new file mode 100644 index 00000000000..f3be4dc2a92 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/path/PathElement.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.path; + +/** + * root interface for all pieces of an AvroPath expression + */ +public interface PathElement { +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/path/PathPredicate.java b/lang/java/avro/src/main/java/org/apache/avro/path/PathPredicate.java new file mode 100644 index 00000000000..092894652f2 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/path/PathPredicate.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.path; + +/** + * a predicate is a filter that restricts items selected by a + * {@link LocationStep} + */ +public interface PathPredicate extends PathElement { +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/path/PathTracingException.java b/lang/java/avro/src/main/java/org/apache/avro/path/PathTracingException.java new file mode 100644 index 00000000000..ac9ba513722 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/path/PathTracingException.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.path; + +import org.apache.avro.Schema; + +/** + * interface for exceptions that can trace the AvroPath of an error + * + * @param the regular (user-facing) exception that will be + * {@link #summarize(Schema)}ed out of this class + */ +public interface PathTracingException { + /** + * appends a path element to the trace. expected to be called in reverse-order + * as the exception bubbles up the stack + * + * @param step an AvroPath step tracing back from the location of the original + * exception towards the root of the data graph + */ + void tracePath(PathElement step); + + /** + * produces a user-facing exception to be thrown back out to user code + * + * @param root the root object for the operation that generated the exception + * @return an exception + */ + T summarize(Schema root); +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/path/PositionalPathPredicate.java b/lang/java/avro/src/main/java/org/apache/avro/path/PositionalPathPredicate.java new file mode 100644 index 00000000000..3c9751ef2a5 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/path/PositionalPathPredicate.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.path; + +/** + * filters items by their context position + */ +public interface PositionalPathPredicate extends PathPredicate { +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/path/TracingAvroTypeException.java b/lang/java/avro/src/main/java/org/apache/avro/path/TracingAvroTypeException.java new file mode 100644 index 00000000000..f7dae885d5d --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/path/TracingAvroTypeException.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.path; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.avro.AvroTypeException; +import org.apache.avro.Schema; +import org.apache.avro.util.SchemaUtil; + +/** + * an {@link AvroTypeException} with extra fields used to trace back the path to + * a bad value through an object graph + */ +public class TracingAvroTypeException extends AvroTypeException implements PathTracingException { + private final List reversePath; + + public TracingAvroTypeException(AvroTypeException cause) { + super(cause.getMessage(), cause); + this.reversePath = new ArrayList<>(3); // expected to be short + } + + @Override + public void tracePath(PathElement step) { + reversePath.add(step); + } + + @Override + public AvroTypeException summarize(Schema root) { + AvroTypeException cause = (AvroTypeException) getCause(); + + StringBuilder sb = new StringBuilder(); + sb.append(cause.getMessage()); + + if (reversePath != null && !reversePath.isEmpty()) { + sb.append(" at "); + if (root != null) { + sb.append(SchemaUtil.describe(root)); + } + for (int i = reversePath.size() - 1; i >= 0; i--) { + PathElement step = reversePath.get(i); + sb.append(step.toString()); + } + } + return new AvroTypeException(sb.toString(), cause); + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/path/TracingClassCastException.java b/lang/java/avro/src/main/java/org/apache/avro/path/TracingClassCastException.java new file mode 100644 index 00000000000..87607aee8f6 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/path/TracingClassCastException.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.path; + +import org.apache.avro.Schema; +import org.apache.avro.util.SchemaUtil; + +import java.util.ArrayList; +import java.util.List; + +/** + * a {@link ClassCastException} with extra fields used to trace back the path to + * a bad value through an object graph + */ +public class TracingClassCastException extends ClassCastException implements PathTracingException { + private final ClassCastException cause; + private final Object datum; + private final Schema expected; + private final boolean customCoderUsed; + private final List reversePath; + + public TracingClassCastException(ClassCastException cause, Object datum, Schema expected, boolean customCoderUsed) { + this.cause = cause; + this.datum = datum; + this.expected = expected; + this.customCoderUsed = customCoderUsed; + this.reversePath = new ArrayList<>(3); // assume short + } + + @Override + public void tracePath(PathElement step) { + reversePath.add(step); + } + + @Override + public synchronized ClassCastException getCause() { + return cause; + } + + /** + * @return a hopefully helpful error message + */ + @Override + public ClassCastException summarize(Schema root) { + StringBuilder sb = new StringBuilder(); + sb.append("value ").append(SchemaUtil.describe(datum)); + sb.append(" cannot be cast to expected type ").append(SchemaUtil.describe(expected)); + if (reversePath == null || reversePath.isEmpty()) { + // very simple "shallow" NPE, no nesting at all, or custom coders used means we + // have no data + if (customCoderUsed) { + sb.append(". No further details available as custom coders were used"); + } + } else { + sb.append(" at "); + if (root != null) { + sb.append(SchemaUtil.describe(root)); + } + for (int i = reversePath.size() - 1; i >= 0; i--) { + PathElement step = reversePath.get(i); + sb.append(step.toString()); + } + } + ClassCastException summary = new ClassCastException(sb.toString()); + summary.initCause(cause); + return summary; + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/path/TracingNullPointException.java b/lang/java/avro/src/main/java/org/apache/avro/path/TracingNullPointException.java new file mode 100644 index 00000000000..fabfc764d51 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/path/TracingNullPointException.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.path; + +import org.apache.avro.Schema; +import org.apache.avro.util.SchemaUtil; + +import java.util.ArrayList; +import java.util.List; + +/** + * a {@link NullPointerException} with extra fields used to trace back the path + * to a null value through an object graph + */ +public class TracingNullPointException extends NullPointerException + implements PathTracingException { + private final NullPointerException cause; + private final Schema expected; + private final boolean customCoderUsed; + private final List reversePath; + + public TracingNullPointException(NullPointerException cause, Schema expected, boolean customCoderUsed) { + this.cause = cause; + this.expected = expected; + this.customCoderUsed = customCoderUsed; + this.reversePath = new ArrayList<>(3); // assume short + } + + @Override + public void tracePath(PathElement step) { + reversePath.add(step); + } + + @Override + public synchronized NullPointerException getCause() { + return cause; + } + + /** + * @return a hopefully helpful error message + */ + @Override + public NullPointerException summarize(Schema root) { + StringBuilder sb = new StringBuilder(); + sb.append("null value for (non-nullable) "); + if (reversePath == null || reversePath.isEmpty()) { + // very simple "shallow" NPE, no nesting at all, or custom coders used means we + // have no data + if (customCoderUsed) { + sb.append("field or map key. No further details available as custom coders were used"); + } else { + sb.append(SchemaUtil.describe(expected)); + } + } else { + PathElement innerMostElement = reversePath.get(0); + boolean isNullMapKey = innerMostElement instanceof MapKeyPredicate + && ((MapKeyPredicate) innerMostElement).getKey() == null; + if (isNullMapKey) { + sb.delete(0, sb.length()); // clear + sb.append("null key in map"); + } else { + sb.append(SchemaUtil.describe(expected)); + } + sb.append(" at "); + if (root != null) { + sb.append(SchemaUtil.describe(root)); + } + for (int i = reversePath.size() - 1; i >= 0; i--) { + PathElement step = reversePath.get(i); + sb.append(step.toString()); + } + } + NullPointerException summary = new NullPointerException(sb.toString()); + summary.initCause(cause); + return summary; + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/path/UnionTypePredicate.java b/lang/java/avro/src/main/java/org/apache/avro/path/UnionTypePredicate.java new file mode 100644 index 00000000000..01e30e108f1 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/path/UnionTypePredicate.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.path; + +/** + * Returns items by their position (numeric index of type) in a union schema + */ +public class UnionTypePredicate implements PositionalPathPredicate { + private final String type; + + public UnionTypePredicate(String type) { + this.type = type; + } + + @Override + public String toString() { + return "[" + type + "]"; + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/path/package-info.java b/lang/java/avro/src/main/java/org/apache/avro/path/package-info.java new file mode 100644 index 00000000000..8b1dea4b56c --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/path/package-info.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Interfaces and base classes for AvroPath. This functionality is + * experimental, meaning these APIs are not expected to be stable any + * time soon so use at your own risk. Feedback, however, would be very + * appreciated :-) + */ +package org.apache.avro.path; diff --git a/lang/java/avro/src/main/java/org/apache/avro/path/package.html b/lang/java/avro/src/main/java/org/apache/avro/path/package.html new file mode 100644 index 00000000000..73ab0a71528 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/path/package.html @@ -0,0 +1,29 @@ + + + + + + Interfaces and base classes for AvroPath. + +

    + This functionality is experimental, meaning these APIs are not + expected to be stable any time soon so use at your own risk. Feedback, + however, would be very appreciated :-) +

    + + diff --git a/lang/java/avro/src/main/java/org/apache/avro/reflect/AvroEncode.java b/lang/java/avro/src/main/java/org/apache/avro/reflect/AvroEncode.java index 225f247a9ed..b4a021dce79 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/reflect/AvroEncode.java +++ b/lang/java/avro/src/main/java/org/apache/avro/reflect/AvroEncode.java @@ -18,6 +18,7 @@ package org.apache.avro.reflect; import java.lang.annotation.ElementType; +import java.lang.annotation.Inherited; import java.lang.annotation.Retention; import java.lang.annotation.RetentionPolicy; import java.lang.annotation.Target; @@ -30,7 +31,8 @@ * file. Use of {@link org.apache.avro.io.ValidatingEncoder} is recommended. */ @Retention(RetentionPolicy.RUNTIME) -@Target(ElementType.FIELD) +@Inherited +@Target({ ElementType.FIELD, ElementType.TYPE }) public @interface AvroEncode { Class> using(); } diff --git a/lang/java/avro/src/main/java/org/apache/avro/reflect/FieldAccess.java b/lang/java/avro/src/main/java/org/apache/avro/reflect/FieldAccess.java index 96188495121..dce1aed98a5 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/reflect/FieldAccess.java +++ b/lang/java/avro/src/main/java/org/apache/avro/reflect/FieldAccess.java @@ -21,6 +21,22 @@ abstract class FieldAccess { + protected static final int INT_DEFAULT_VALUE = 0; + + protected static final float FLOAT_DEFAULT_VALUE = 0.0f; + + protected static final short SHORT_DEFAULT_VALUE = (short) 0; + + protected static final byte BYTE_DEFAULT_VALUE = (byte) 0; + + protected static final boolean BOOLEAN_DEFAULT_VALUE = false; + + protected static final char CHAR_DEFAULT_VALUE = '\u0000'; + + protected static final long LONG_DEFAULT_VALUE = 0L; + + protected static final double DOUBLE_DEFAULT_VALUE = 0.0d; + protected abstract FieldAccessor getAccessor(Field field); } diff --git a/lang/java/avro/src/main/java/org/apache/avro/reflect/FieldAccessReflect.java b/lang/java/avro/src/main/java/org/apache/avro/reflect/FieldAccessReflect.java index c790dbfb886..72d0563290b 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/reflect/FieldAccessReflect.java +++ b/lang/java/avro/src/main/java/org/apache/avro/reflect/FieldAccessReflect.java @@ -28,7 +28,7 @@ class FieldAccessReflect extends FieldAccess { @Override protected FieldAccessor getAccessor(Field field) { - AvroEncode enc = field.getAnnotation(AvroEncode.class); + AvroEncode enc = ReflectionUtil.getAvroEncode(field); if (enc != null) try { return new ReflectionBasesAccessorCustomEncoded(field, enc.using().getDeclaredConstructor().newInstance()); @@ -40,14 +40,14 @@ protected FieldAccessor getAccessor(Field field) { private static class ReflectionBasedAccessor extends FieldAccessor { protected final Field field; - private boolean isStringable; - private boolean isCustomEncoded; + private final boolean isStringable; + private final boolean isCustomEncoded; public ReflectionBasedAccessor(Field field) { this.field = field; this.field.setAccessible(true); isStringable = field.isAnnotationPresent(Stringable.class); - isCustomEncoded = field.isAnnotationPresent(AvroEncode.class); + isCustomEncoded = ReflectionUtil.getAvroEncode(field) != null; } @Override @@ -62,7 +62,29 @@ public Object get(Object object) throws IllegalAccessException { @Override public void set(Object object, Object value) throws IllegalAccessException, IOException { - field.set(object, value); + if (value == null && field.getType().isPrimitive()) { + Object defaultValue = null; + if (int.class.equals(field.getType())) { + defaultValue = INT_DEFAULT_VALUE; + } else if (float.class.equals(field.getType())) { + defaultValue = FLOAT_DEFAULT_VALUE; + } else if (short.class.equals(field.getType())) { + defaultValue = SHORT_DEFAULT_VALUE; + } else if (byte.class.equals(field.getType())) { + defaultValue = BYTE_DEFAULT_VALUE; + } else if (boolean.class.equals(field.getType())) { + defaultValue = BOOLEAN_DEFAULT_VALUE; + } else if (char.class.equals(field.getType())) { + defaultValue = CHAR_DEFAULT_VALUE; + } else if (long.class.equals(field.getType())) { + defaultValue = LONG_DEFAULT_VALUE; + } else if (double.class.equals(field.getType())) { + defaultValue = DOUBLE_DEFAULT_VALUE; + } + field.set(object, defaultValue); + } else { + field.set(object, value); + } } @Override @@ -83,7 +105,7 @@ protected boolean isCustomEncoded() { private static final class ReflectionBasesAccessorCustomEncoded extends ReflectionBasedAccessor { - private CustomEncoding encoding; + private final CustomEncoding encoding; public ReflectionBasesAccessorCustomEncoded(Field f, CustomEncoding encoding) { super(f); diff --git a/lang/java/avro/src/main/java/org/apache/avro/reflect/FieldAccessUnsafe.java b/lang/java/avro/src/main/java/org/apache/avro/reflect/FieldAccessUnsafe.java deleted file mode 100644 index f555df49ae2..00000000000 --- a/lang/java/avro/src/main/java/org/apache/avro/reflect/FieldAccessUnsafe.java +++ /dev/null @@ -1,366 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.avro.reflect; - -import java.io.IOException; -import java.lang.reflect.Field; - -import org.apache.avro.AvroRuntimeException; -import org.apache.avro.io.Decoder; -import org.apache.avro.io.Encoder; - -import sun.misc.Unsafe; - -@SuppressWarnings("restriction") -class FieldAccessUnsafe extends FieldAccess { - - private static final Unsafe UNSAFE; - - static { - try { - Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe"); - theUnsafe.setAccessible(true); - UNSAFE = (Unsafe) theUnsafe.get(null); - // It seems not all Unsafe implementations implement the following method. - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - @Override - protected FieldAccessor getAccessor(Field field) { - AvroEncode enc = field.getAnnotation(AvroEncode.class); - if (enc != null) - try { - return new UnsafeCustomEncodedField(field, enc.using().getDeclaredConstructor().newInstance()); - } catch (Exception e) { - throw new AvroRuntimeException("Could not instantiate custom Encoding"); - } - Class c = field.getType(); - if (c == int.class) - return new UnsafeIntField(field); - else if (c == long.class) - return new UnsafeLongField(field); - else if (c == byte.class) - return new UnsafeByteField(field); - else if (c == float.class) - return new UnsafeFloatField(field); - else if (c == double.class) - return new UnsafeDoubleField(field); - else if (c == char.class) - return new UnsafeCharField(field); - else if (c == boolean.class) - return new UnsafeBooleanField(field); - else if (c == short.class) - return new UnsafeShortField(field); - else - return new UnsafeObjectField(field); - } - - abstract static class UnsafeCachedField extends FieldAccessor { - protected final long offset; - protected Field field; - protected final boolean isStringable; - - UnsafeCachedField(Field f) { - this.offset = UNSAFE.objectFieldOffset(f); - this.field = f; - this.isStringable = f.isAnnotationPresent(Stringable.class); - } - - @Override - protected Field getField() { - return field; - } - - @Override - protected boolean supportsIO() { - return true; - } - - @Override - protected boolean isStringable() { - return isStringable; - } - } - - final static class UnsafeIntField extends UnsafeCachedField { - UnsafeIntField(Field f) { - super(f); - } - - @Override - protected void set(Object object, Object value) { - UNSAFE.putInt(object, offset, (Integer) value); - } - - @Override - protected Object get(Object object) { - return UNSAFE.getInt(object, offset); - } - - @Override - protected void read(Object object, Decoder in) throws IOException { - UNSAFE.putInt(object, offset, in.readInt()); - } - - @Override - protected void write(Object object, Encoder out) throws IOException { - out.writeInt(UNSAFE.getInt(object, offset)); - } - } - - final static class UnsafeFloatField extends UnsafeCachedField { - protected UnsafeFloatField(Field f) { - super(f); - } - - @Override - protected void set(Object object, Object value) { - UNSAFE.putFloat(object, offset, (Float) value); - } - - @Override - protected Object get(Object object) { - return UNSAFE.getFloat(object, offset); - } - - @Override - protected void read(Object object, Decoder in) throws IOException { - UNSAFE.putFloat(object, offset, in.readFloat()); - } - - @Override - protected void write(Object object, Encoder out) throws IOException { - out.writeFloat(UNSAFE.getFloat(object, offset)); - } - } - - final static class UnsafeShortField extends UnsafeCachedField { - protected UnsafeShortField(Field f) { - super(f); - } - - @Override - protected void set(Object object, Object value) { - UNSAFE.putShort(object, offset, (Short) value); - } - - @Override - protected Object get(Object object) { - return UNSAFE.getShort(object, offset); - } - - @Override - protected void read(Object object, Decoder in) throws IOException { - UNSAFE.putShort(object, offset, (short) in.readInt()); - } - - @Override - protected void write(Object object, Encoder out) throws IOException { - out.writeInt(UNSAFE.getShort(object, offset)); - } - } - - final static class UnsafeByteField extends UnsafeCachedField { - protected UnsafeByteField(Field f) { - super(f); - } - - @Override - protected void set(Object object, Object value) { - UNSAFE.putByte(object, offset, (Byte) value); - } - - @Override - protected Object get(Object object) { - return UNSAFE.getByte(object, offset); - } - - @Override - protected void read(Object object, Decoder in) throws IOException { - UNSAFE.putByte(object, offset, (byte) in.readInt()); - } - - @Override - protected void write(Object object, Encoder out) throws IOException { - out.writeInt(UNSAFE.getByte(object, offset)); - } - } - - final static class UnsafeBooleanField extends UnsafeCachedField { - protected UnsafeBooleanField(Field f) { - super(f); - } - - @Override - protected void set(Object object, Object value) { - UNSAFE.putBoolean(object, offset, (Boolean) value); - } - - @Override - protected Object get(Object object) { - return UNSAFE.getBoolean(object, offset); - } - - @Override - protected void read(Object object, Decoder in) throws IOException { - UNSAFE.putBoolean(object, offset, in.readBoolean()); - } - - @Override - protected void write(Object object, Encoder out) throws IOException { - out.writeBoolean(UNSAFE.getBoolean(object, offset)); - } - } - - final static class UnsafeCharField extends UnsafeCachedField { - protected UnsafeCharField(Field f) { - super(f); - } - - @Override - protected void set(Object object, Object value) { - UNSAFE.putChar(object, offset, (Character) value); - } - - @Override - protected Object get(Object object) { - return UNSAFE.getChar(object, offset); - } - - @Override - protected void read(Object object, Decoder in) throws IOException { - UNSAFE.putChar(object, offset, (char) in.readInt()); - } - - @Override - protected void write(Object object, Encoder out) throws IOException { - out.writeInt(UNSAFE.getChar(object, offset)); - } - } - - final static class UnsafeLongField extends UnsafeCachedField { - protected UnsafeLongField(Field f) { - super(f); - } - - @Override - protected void set(Object object, Object value) { - UNSAFE.putLong(object, offset, (Long) value); - } - - @Override - protected Object get(Object object) { - return UNSAFE.getLong(object, offset); - } - - @Override - protected void read(Object object, Decoder in) throws IOException { - UNSAFE.putLong(object, offset, in.readLong()); - } - - @Override - protected void write(Object object, Encoder out) throws IOException { - out.writeLong(UNSAFE.getLong(object, offset)); - } - } - - final static class UnsafeDoubleField extends UnsafeCachedField { - protected UnsafeDoubleField(Field f) { - super(f); - } - - @Override - protected void set(Object object, Object value) { - UNSAFE.putDouble(object, offset, (Double) value); - } - - @Override - protected Object get(Object object) { - return UNSAFE.getDouble(object, offset); - } - - @Override - protected void read(Object object, Decoder in) throws IOException { - UNSAFE.putDouble(object, offset, in.readDouble()); - } - - @Override - protected void write(Object object, Encoder out) throws IOException { - out.writeDouble(UNSAFE.getDouble(object, offset)); - } - } - - final static class UnsafeObjectField extends UnsafeCachedField { - protected UnsafeObjectField(Field f) { - super(f); - } - - @Override - protected void set(Object object, Object value) { - UNSAFE.putObject(object, offset, value); - } - - @Override - protected Object get(Object object) { - return UNSAFE.getObject(object, offset); - } - - @Override - protected boolean supportsIO() { - return false; - } - - } - - final static class UnsafeCustomEncodedField extends UnsafeCachedField { - - private CustomEncoding encoding; - - UnsafeCustomEncodedField(Field f, CustomEncoding encoding) { - super(f); - this.encoding = encoding; - } - - @Override - protected Object get(Object object) throws IllegalAccessException { - return UNSAFE.getObject(object, offset); - } - - @Override - protected void set(Object object, Object value) throws IllegalAccessException, IOException { - UNSAFE.putObject(object, offset, value); - } - - @Override - protected void read(Object object, Decoder in) throws IOException { - UNSAFE.putObject(object, offset, encoding.read(in)); - } - - @Override - protected void write(Object object, Encoder out) throws IOException { - encoding.write(UNSAFE.getObject(object, offset), out); - } - - @Override - protected boolean isCustomEncoded() { - return true; - } - } -} diff --git a/lang/java/avro/src/main/java/org/apache/avro/reflect/MapEntry.java b/lang/java/avro/src/main/java/org/apache/avro/reflect/MapEntry.java index fcae137ae07..21c52bc8792 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/reflect/MapEntry.java +++ b/lang/java/avro/src/main/java/org/apache/avro/reflect/MapEntry.java @@ -30,7 +30,9 @@ * * @param Key of the map-entry * @param Value of the map-entry + * @deprecated Use org.apache.avro.util.MapEntry */ +@Deprecated public class MapEntry implements Map.Entry { K key; diff --git a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java index 4ead6b888f4..61e070b0525 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java +++ b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java @@ -26,6 +26,7 @@ import org.apache.avro.Protocol.Message; import org.apache.avro.Schema; import org.apache.avro.SchemaNormalization; +import org.apache.avro.SchemaParser; import org.apache.avro.generic.GenericContainer; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericFixed; @@ -63,12 +64,19 @@ import java.util.Map; import java.util.WeakHashMap; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; /** Utilities to use existing Java classes and interfaces via reflection. */ public class ReflectData extends SpecificData { private static final String STRING_OUTER_PARENT_REFERENCE = "this$0"; + // holds a wrapper so null entries will have a cached value + private final ConcurrentMap encoderCache = new ConcurrentHashMap<>(); + + /** + * Always false since custom coders are not available for {@link ReflectData}. + */ @Override public boolean useCustomCoders() { return false; @@ -101,6 +109,10 @@ protected Schema createFieldSchema(Field field, Map names) { private static final ReflectData INSTANCE = new ReflectData(); + static { + addLogicalTypeConversions(INSTANCE); + } + /** For subclasses. Applications normally use {@link ReflectData#get()}. */ public ReflectData() { } @@ -355,8 +367,8 @@ protected ClassAccessorData computeValue(Class c) { static class ClassAccessorData { private final Class clazz; private final Map byName = new HashMap<>(); - // getAccessorsFor is already synchronized, no need to wrap - final Map bySchema = new WeakHashMap<>(); + // getAccessorsFor replaces this map with each modification + volatile Map bySchema = new WeakHashMap<>(); private ClassAccessorData(Class c) { clazz = c; @@ -374,12 +386,14 @@ private ClassAccessorData(Class c) { * Return the field accessors as an array, indexed by the field index of the * given schema. */ - private synchronized FieldAccessor[] getAccessorsFor(Schema schema) { - // if synchronized is removed from this method, adjust bySchema appropriately + private FieldAccessor[] getAccessorsFor(Schema schema) { + // to avoid synchronization, we replace the map for each modification FieldAccessor[] result = bySchema.get(schema); if (result == null) { result = createAccessorsFor(schema); + Map bySchema = new WeakHashMap<>(this.bySchema); bySchema.put(schema, result); + this.bySchema = bySchema; } return result; } @@ -422,16 +436,6 @@ private FieldAccessor getFieldAccessor(Class c, String fieldName) { return null; } - /** @deprecated Replaced by {@link SpecificData#CLASS_PROP} */ - @Deprecated - static final String CLASS_PROP = "java-class"; - /** @deprecated Replaced by {@link SpecificData#KEY_CLASS_PROP} */ - @Deprecated - static final String KEY_CLASS_PROP = "java-key-class"; - /** @deprecated Replaced by {@link SpecificData#ELEMENT_PROP} */ - @Deprecated - static final String ELEMENT_PROP = "java-element-class"; - private static final Map CLASS_CACHE = new ConcurrentHashMap<>(); static Class getClassProp(Schema schema, String prop) { @@ -550,8 +554,7 @@ Schema createNonStringMapSchema(Type keyType, Type valueType, Map names) { return result; } else if (type instanceof Class) { // Class Class c = (Class) type; + while (c.isAnonymousClass()) { + c = c.getSuperclass(); + } if (c.isPrimitive() || // primitives c == Void.class || c == Boolean.class || c == Integer.class || c == Long.class || c == Float.class || c == Double.class || c == Byte.class || c == Short.class || c == Character.class) @@ -682,9 +685,17 @@ protected Schema createSchema(Type type, Map names) { setElement(result, component); return result; } + AvroEncode enc = ReflectionUtil.getAvroEncode(c); + if (enc != null) { + try { + return enc.using().getDeclaredConstructor().newInstance().getSchema(); + } catch (Exception e) { + throw new AvroRuntimeException("Could not create schema from custom serializer for " + c.getName()); + } + } AvroSchema explicit = c.getAnnotation(AvroSchema.class); if (explicit != null) // explicit schema - return new Schema.Parser().parse(explicit.value()); + return SchemaParser.parseSingle(explicit.value()); if (CharSequence.class.isAssignableFrom(c)) // String return Schema.create(Schema.Type.STRING); if (ByteBuffer.class.isAssignableFrom(c)) // bytes @@ -729,7 +740,7 @@ protected Schema createSchema(Type type, Map names) { boolean error = Throwable.class.isAssignableFrom(c); schema = Schema.createRecord(name, doc, space, error); consumeAvroAliasAnnotation(c, schema); - names.put(c.getName(), schema); + names.put(fullName, schema); for (Field field : getCachedFields(c)) if ((field.getModifiers() & (Modifier.TRANSIENT | Modifier.STATIC)) == 0 && !field.isAnnotationPresent(AvroIgnore.class)) { @@ -748,7 +759,7 @@ protected Schema createSchema(Type type, Map names) { AvroMeta[] metadata = field.getAnnotationsByType(AvroMeta.class); // add metadata for (AvroMeta meta : metadata) { - if (recordField.getObjectProps().containsKey(meta.key())) { + if (recordField.propsContainsKey(meta.key())) { throw new AvroTypeException("Duplicate field prop key: " + meta.key()); } recordField.addProp(meta.key(), meta.value()); @@ -767,14 +778,18 @@ protected Schema createSchema(Type type, Map names) { schema.setFields(fields); AvroMeta[] metadata = c.getAnnotationsByType(AvroMeta.class); for (AvroMeta meta : metadata) { - if (schema.getObjectProps().containsKey(meta.key())) { + if (schema.propsContainsKey(meta.key())) { throw new AvroTypeException("Duplicate type prop key: " + meta.key()); } schema.addProp(meta.key(), meta.value()); } + // This is added immediately back into the names to ensure that the discoverable + // order is maintained if its a LinkedHashMap. + names.remove(fullName, schema); } names.put(fullName, schema); } + names.put(c.getName(), schema); return schema; } return super.createSchema(type, names); @@ -785,6 +800,18 @@ protected boolean isStringable(Class c) { return c.isAnnotationPresent(Stringable.class) || super.isStringable(c); } + private String simpleName(Class c) { + String simpleName = null; + if (c != null) { + while (c.isAnonymousClass()) { + c = c.getSuperclass(); + } + simpleName = c.getSimpleName(); + } + + return simpleName; + } + private static final Schema THROWABLE_MESSAGE = makeNullable(Schema.create(Schema.Type.STRING)); // if array element type is a class with a union annotation, note it @@ -826,7 +853,7 @@ public static Schema makeNullable(Schema schema) { } } - private static final Map, Field[]> FIELDS_CACHE = new ConcurrentHashMap<>(); + private static final ConcurrentMap, Field[]> FIELDS_CACHE = new ConcurrentHashMap<>(); // Return of this class and its superclasses to serialize. private static Field[] getCachedFields(Class recordClass) { @@ -854,7 +881,7 @@ private static Field[] getFields(Class recordClass, boolean excludeJava) { /** Create a schema for a field. */ protected Schema createFieldSchema(Field field, Map names) { - AvroEncode enc = field.getAnnotation(AvroEncode.class); + AvroEncode enc = ReflectionUtil.getAvroEncode(field); if (enc != null) try { return enc.using().getDeclaredConstructor().newInstance().getSchema(); @@ -864,7 +891,7 @@ protected Schema createFieldSchema(Field field, Map names) { AvroSchema explicit = field.getAnnotation(AvroSchema.class); if (explicit != null) // explicit schema - return new Schema.Parser().parse(explicit.value()); + return SchemaParser.parseSingle(explicit.value()); Union union = field.getAnnotation(Union.class); if (union != null) @@ -888,8 +915,7 @@ protected Schema createFieldSchema(Field field, Map names) { */ @Override public Protocol getProtocol(Class iface) { - Protocol protocol = new Protocol(iface.getSimpleName(), - iface.getPackage() == null ? "" : iface.getPackage().getName()); + Protocol protocol = new Protocol(simpleName(iface), iface.getPackage() == null ? "" : iface.getPackage().getName()); Map names = new LinkedHashMap<>(); Map messages = protocol.getMessages(); Map, Type> genericTypeVariableMap = ReflectionUtil.resolveTypeVariables(iface); @@ -902,11 +928,7 @@ public Protocol getProtocol(Class iface) { } } - // reverse types, since they were defined in reference order - List types = new ArrayList<>(names.values()); - Collections.reverse(types); - protocol.setTypes(types); - + protocol.setTypes(new ArrayList<>(names.values())); return protocol; } @@ -918,7 +940,7 @@ private Message getMessage(Method method, Protocol protocol, Map names); for (Annotation annotation : parameter.getAnnotations()) { if (annotation instanceof AvroSchema) // explicit schema - paramSchema = new Schema.Parser().parse(((AvroSchema) annotation).value()); + paramSchema = SchemaParser.parseSingle(((AvroSchema) annotation).value()); else if (annotation instanceof Union) // union paramSchema = getAnnotatedUnion(((Union) annotation), names); else if (annotation instanceof Nullable) // nullable @@ -938,7 +960,7 @@ else if (annotation instanceof Nullable) // nullable AvroSchema explicit = method.getAnnotation(AvroSchema.class); if (explicit != null) // explicit schema - response = new Schema.Parser().parse(explicit.value()); + response = SchemaParser.parseSingle(explicit.value()); List errs = new ArrayList<>(); errs.add(Protocol.SYSTEM_ERROR); // every method can throw @@ -1037,4 +1059,36 @@ public Object newRecord(Object old, Schema schema) { } return super.newRecord(old, schema); } + + public CustomEncoding getCustomEncoding(Schema schema) { + + return this.encoderCache.computeIfAbsent(schema, this::populateEncoderCache).get(); + } + + private CustomEncodingWrapper populateEncoderCache(Schema schema) { + var enc = ReflectionUtil.getAvroEncode(getClass(schema)); + if (enc != null) { + try { + return new CustomEncodingWrapper(enc.using().getDeclaredConstructor().newInstance()); + } catch (Exception e) { + throw new AvroRuntimeException("Could not instantiate custom Encoding"); + } + } + return new CustomEncodingWrapper(null); + } + + private static class CustomEncodingWrapper { + + private final CustomEncoding customEncoding; + + private CustomEncodingWrapper(CustomEncoding customEncoding) { + this.customEncoding = customEncoding; + } + + public CustomEncoding get() { + return customEncoding; + } + + } + } diff --git a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectDatumReader.java b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectDatumReader.java index 20be49ec408..7ba8e4827c6 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectDatumReader.java +++ b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectDatumReader.java @@ -21,8 +21,11 @@ import java.lang.reflect.Array; import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.HashSet; +import java.util.HashMap; import java.util.Collection; import java.util.Map; +import java.util.Optional; import org.apache.avro.AvroRuntimeException; import org.apache.avro.Conversion; @@ -70,6 +73,10 @@ public ReflectDatumReader(ReflectData data) { super(data); } + private ReflectData getReflectData() { + return (ReflectData) getSpecificData(); + } + @Override protected Object newArray(Object old, int size, Schema schema) { Class collectionClass = ReflectData.getClassProp(schema, SpecificData.CLASS_PROP); @@ -92,8 +99,16 @@ protected Object newArray(Object old, int size, Schema schema) { ((Collection) old).clear(); return old; } + if (collectionClass.isAssignableFrom(ArrayList.class)) return new ArrayList<>(); + + if (collectionClass.isAssignableFrom(HashSet.class)) + return new HashSet<>(); + + if (collectionClass.isAssignableFrom(HashMap.class)) + return new HashMap<>(); + return SpecificData.newInstance(collectionClass, schema); } @@ -135,7 +150,7 @@ protected Object readArray(Object old, Schema expected, ResolvingDecoder in) thr return readCollection(c, expectedType, l, in); } else if (array instanceof Map) { // Only for non-string keys, we can use NS_MAP_* fields - // So we check the samee explicitly here + // So we check the same explicitly here if (ReflectData.isNonStringMapSchema(expected)) { Collection c = new ArrayList<>(); readCollection(c, expectedType, l, in); @@ -240,6 +255,16 @@ protected Object readBytes(Object old, Schema s, Decoder in) throws IOException } } + @Override + protected Object read(Object old, Schema expected, ResolvingDecoder in) throws IOException { + CustomEncoding encoder = getReflectData().getCustomEncoding(expected); + if (encoder != null) { + return encoder.read(old, in); + } else { + return super.read(old, expected, in); + } + } + @Override protected Object readInt(Object old, Schema expected, Decoder in) throws IOException { Object value = in.readInt(); @@ -287,6 +312,15 @@ protected void readField(Object record, Field field, Object oldDatum, ResolvingD return; } } + if (Optional.class.isAssignableFrom(accessor.getField().getType())) { + try { + Object value = readWithoutConversion(oldDatum, field.schema(), in); + accessor.set(record, Optional.ofNullable(value)); + return; + } catch (IllegalAccessException e) { + throw new AvroRuntimeException("Failed to set " + field); + } + } try { accessor.set(record, readWithoutConversion(oldDatum, field.schema(), in)); return; diff --git a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectDatumWriter.java b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectDatumWriter.java index 3dc53be1e31..b9b083fd6b2 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectDatumWriter.java +++ b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectDatumWriter.java @@ -22,6 +22,7 @@ import java.util.Collection; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import org.apache.avro.AvroRuntimeException; @@ -29,6 +30,7 @@ import org.apache.avro.Schema.Field; import org.apache.avro.io.Encoder; import org.apache.avro.specific.SpecificDatumWriter; +import org.apache.avro.util.MapEntry; /** * {@link org.apache.avro.io.DatumWriter DatumWriter} for existing classes via @@ -59,6 +61,10 @@ protected ReflectDatumWriter(ReflectData reflectData) { super(reflectData); } + private ReflectData getReflectData() { + return (ReflectData) getSpecificData(); + } + /** * Called to write a array. May be overridden for alternate array * representations. @@ -80,8 +86,7 @@ protected void writeArray(Schema schema, Object datum, Encoder out) throws IOExc out.writeArrayStart(); switch (type) { case BOOLEAN: - if (elementClass.isPrimitive()) - ArrayAccessor.writeArray((boolean[]) datum, out); + ArrayAccessor.writeArray((boolean[]) datum, out); break; case DOUBLE: ArrayAccessor.writeArray((double[]) datum, out); @@ -153,9 +158,17 @@ else if (datum instanceof Map && ReflectData.isNonStringMapSchema(schema)) { entryList.add(new MapEntry(e.getKey(), e.getValue())); } datum = entryList; + } else if (datum instanceof Optional) { + datum = ((Optional) datum).orElse(null); } try { - super.write(schema, datum, out); + + CustomEncoding encoder = getReflectData().getCustomEncoding(schema); + if (encoder != null) { + encoder.write(datum, out); + } else { + super.write(schema, datum, out); + } } catch (NullPointerException e) { // improve error message throw npe(e, " in " + schema.getFullName()); } diff --git a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectionUtil.java b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectionUtil.java index 18ad4754c7e..3221d91d1f2 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectionUtil.java +++ b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectionUtil.java @@ -24,6 +24,7 @@ import java.lang.invoke.MethodHandle; import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodType; +import java.lang.reflect.Field; import java.lang.reflect.ParameterizedType; import java.lang.reflect.Type; import java.lang.reflect.TypeVariable; @@ -56,25 +57,13 @@ static void resetFieldAccess() { // so it is monomorphic and the JIT can inline FieldAccess access = null; try { - if (null == System.getProperty("avro.disable.unsafe")) { - FieldAccess unsafeAccess = load("org.apache.avro.reflect.FieldAccessUnsafe", FieldAccess.class); - if (validate(unsafeAccess)) { - access = unsafeAccess; - } - } - } catch (Throwable ignored) { - } - if (access == null) { - try { - FieldAccess reflectAccess = load("org.apache.avro.reflect.FieldAccessReflect", FieldAccess.class); - if (validate(reflectAccess)) { - access = reflectAccess; - } - } catch (Throwable oops) { - throw new AvroRuntimeException("Unable to load a functional FieldAccess class!"); + FieldAccess reflectAccess = new FieldAccessReflect(); + if (validate(reflectAccess)) { + fieldAccess = reflectAccess; } + } catch (Throwable oops) { + throw new AvroRuntimeException("Unable to load a functional FieldAccess class!"); } - fieldAccess = access; } private static T load(String name, Class type) throws Exception { @@ -118,10 +107,8 @@ private boolean validate(FieldAccess access) throws Exception { } private boolean validField(FieldAccess access, String name, Object original, Object toSet) throws Exception { - FieldAccessor a; - boolean valid = true; - a = accessor(access, name); - valid &= original.equals(a.get(this)); + FieldAccessor a = accessor(access, name); + boolean valid = original.equals(a.get(this)); a.set(this, toSet); valid &= !original.equals(a.get(this)); return valid; @@ -202,4 +189,19 @@ public static Function getConstructorAsFunction(Class parameterC } } + protected static AvroEncode getAvroEncode(Field field) { + var enc = field.getAnnotation(AvroEncode.class); + if (enc != null) { + return enc; + } else { + return getAvroEncode(field.getType()); + } + } + + protected static AvroEncode getAvroEncode(Class clazz) { + if (clazz == null) { + return null; + } + return clazz.getAnnotation(AvroEncode.class); + } } diff --git a/lang/java/avro/src/main/java/org/apache/avro/specific/ExternalizableInput.java b/lang/java/avro/src/main/java/org/apache/avro/specific/ExternalizableInput.java index 503db7d16fe..8fa56ae1232 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/specific/ExternalizableInput.java +++ b/lang/java/avro/src/main/java/org/apache/avro/specific/ExternalizableInput.java @@ -42,11 +42,6 @@ public void close() throws IOException { in.close(); } - @Override - public boolean markSupported() { - return false; - } - @Override public int read() throws IOException { return in.read(); diff --git a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificData.java b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificData.java index 104b12edd3b..dbed69d79bb 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificData.java +++ b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificData.java @@ -19,9 +19,12 @@ import org.apache.avro.AvroRuntimeException; import org.apache.avro.AvroTypeException; +import org.apache.avro.Conversions; import org.apache.avro.Protocol; import org.apache.avro.Schema; import org.apache.avro.Schema.Type; +import org.apache.avro.SchemaParser; +import org.apache.avro.data.TimeConversions; import org.apache.avro.generic.GenericData; import org.apache.avro.io.BinaryDecoder; import org.apache.avro.io.BinaryEncoder; @@ -30,6 +33,8 @@ import org.apache.avro.io.DecoderFactory; import org.apache.avro.io.EncoderFactory; import org.apache.avro.util.ClassUtils; +import org.apache.avro.util.SchemaUtil; +import org.apache.avro.util.internal.ClassValueCache; import java.io.ObjectInput; import java.io.ObjectOutput; @@ -44,52 +49,74 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.WeakHashMap; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.function.Function; /** Utilities for generated Java classes and interfaces. */ public class SpecificData extends GenericData { private static final SpecificData INSTANCE = new SpecificData(); + static { + addLogicalTypeConversions(INSTANCE); + } + + protected static void addLogicalTypeConversions(SpecificData instance) { + instance.addLogicalTypeConversion(new Conversions.UUIDConversion()); + // Disable DecimalConversion since it's gated behind + // `compiler.setEnableDecimalLogicalType` + // INSTANCE.addLogicalTypeConversion(new Conversions.DecimalConversion()); + instance.addLogicalTypeConversion(new Conversions.BigDecimalConversion()); + instance.addLogicalTypeConversion(new Conversions.DurationConversion()); + instance.addLogicalTypeConversion(new TimeConversions.DateConversion()); + instance.addLogicalTypeConversion(new TimeConversions.LocalTimestampMicrosConversion()); + instance.addLogicalTypeConversion(new TimeConversions.LocalTimestampMillisConversion()); + instance.addLogicalTypeConversion(new TimeConversions.LocalTimestampNanosConversion()); + instance.addLogicalTypeConversion(new TimeConversions.TimeMicrosConversion()); + instance.addLogicalTypeConversion(new TimeConversions.TimeMillisConversion()); + instance.addLogicalTypeConversion(new TimeConversions.TimestampMicrosConversion()); + instance.addLogicalTypeConversion(new TimeConversions.TimestampMillisConversion()); + instance.addLogicalTypeConversion(new TimeConversions.TimestampNanosConversion()); + } + private static final Class[] NO_ARG = new Class[] {}; private static final Class[] SCHEMA_ARG = new Class[] { Schema.class }; - private static final ClassValue CTOR_CACHE = new ClassValue() { - @Override - protected Constructor computeValue(Class c) { - boolean useSchema = SchemaConstructable.class.isAssignableFrom(c); - try { - Constructor meth = c.getDeclaredConstructor(useSchema ? SCHEMA_ARG : NO_ARG); - meth.setAccessible(true); - return meth; - } catch (Exception e) { - throw new RuntimeException(e); - } + + private static final Function, Constructor> CTOR_CACHE = new ClassValueCache<>(c -> { + boolean useSchema = SchemaConstructable.class.isAssignableFrom(c); + try { + Constructor meth = c.getDeclaredConstructor(useSchema ? SCHEMA_ARG : NO_ARG); + meth.setAccessible(true); + return meth; + } catch (Exception e) { + throw new RuntimeException(e); } + }); - }; - private static final ClassValue MODEL_CACHE = new ClassValue() { - @Override - protected SpecificData computeValue(Class type) { - Field specificDataField; - try { - specificDataField = type.getDeclaredField("MODEL$"); - specificDataField.setAccessible(true); - return (SpecificData) specificDataField.get(null); - } catch (NoSuchFieldException e) { - // Return default instance - return SpecificData.get(); - } catch (IllegalAccessException e) { - throw new AvroRuntimeException("while trying to access field MODEL$ on " + type.getCanonicalName(), e); - } + private static final Function, SpecificData> MODEL_CACHE = new ClassValueCache<>(c -> { + Field specificDataField; + try { + specificDataField = c.getDeclaredField("MODEL$"); + specificDataField.setAccessible(true); + return (SpecificData) specificDataField.get(null); + } catch (NoSuchFieldException e) { + // Return default instance + return SpecificData.get(); + } catch (IllegalAccessException e) { + throw new AvroRuntimeException("while trying to access field MODEL$ on " + c.getCanonicalName(), e); } - }; + }); public static final String CLASS_PROP = "java-class"; public static final String KEY_CLASS_PROP = "java-key-class"; public static final String ELEMENT_PROP = "java-element-class"; + public static final char RESERVED_WORD_ESCAPE_CHAR = '$'; + /** * Reserved words from * https://docs.oracle.com/javase/specs/jls/se16/html/jls-3.html require @@ -104,12 +131,36 @@ protected SpecificData computeValue(Class type) { "throw", "throws", "transient", "try", "void", "volatile", "while", // Literals from Section 3.10 can't be used as identifiers. "true", "false", "null", - // Some keywords from Section 3.8 can't be used as type identifiers. - "var", "yield", "record", // Note that module-related restricted keywords can still be used. // Class names used internally by the avro code generator "Builder")); + /* Reserved words for accessor/mutator methods */ + public static final Set ACCESSOR_MUTATOR_RESERVED_WORDS = new HashSet<>( + Arrays.asList("class", "schema", "classSchema")); + + static { + // Add reserved words to accessor/mutator reserved words + ACCESSOR_MUTATOR_RESERVED_WORDS.addAll(RESERVED_WORDS); + } + + /* Reserved words for type identifiers */ + public static final Set TYPE_IDENTIFIER_RESERVED_WORDS = new HashSet<>( + Arrays.asList("var", "yield", "record")); + + static { + // Add reserved words to type identifier reserved words + TYPE_IDENTIFIER_RESERVED_WORDS.addAll(RESERVED_WORDS); + } + + /* Reserved words for error types */ + public static final Set ERROR_RESERVED_WORDS = new HashSet<>(Arrays.asList("message", "cause")); + + static { + // Add accessor/mutator reserved words to error reserved words + ERROR_RESERVED_WORDS.addAll(ACCESSOR_MUTATOR_RESERVED_WORDS); + } + /** * Read/write some common builtin classes as strings. Representing these as * strings isn't always best, as they aren't always ordered ideally, but at @@ -151,16 +202,16 @@ public static SpecificData get() { } /** - * For RECORD type schemas, this method returns the SpecificData instance of the - * class associated with the schema, in order to get the right conversions for - * any logical types used. + * For RECORD and UNION type schemas, this method returns the SpecificData + * instance of the class associated with the schema, in order to get the right + * conversions for any logical types used. * * @param reader the reader schema * @return the SpecificData associated with the schema's class, or the default * instance. */ public static SpecificData getForSchema(Schema reader) { - if (reader != null && reader.getType() == Type.RECORD) { + if (reader != null && (reader.getType() == Type.RECORD || reader.getType() == Type.UNION)) { final Class clazz = SpecificData.get().getClass(reader); if (clazz != null) { return getForClass(clazz); @@ -183,7 +234,7 @@ public static SpecificData getForSchema(Schema reader) { */ public static SpecificData getForClass(Class c) { if (SpecificRecordBase.class.isAssignableFrom(c)) { - return MODEL_CACHE.get(c); + return MODEL_CACHE.apply(c); } return SpecificData.get(); } @@ -193,7 +244,7 @@ public static SpecificData getForClass(Class c) { /** * Retrieve the current value of the custom-coders feature flag. Defaults to - * true, but this default can be overriden using the system + * false, but this default can be overridden using the system * property org.apache.avro.specific.use_custom_coders, and can be * set dynamically by {@link SpecificData#useCustomCoders()}. See classCache = new ConcurrentHashMap<>(); + private final ConcurrentMap classCache = new ConcurrentHashMap<>(); private static final Class NO_CLASS = new Object() { }.getClass(); private static final Schema NULL_SCHEMA = Schema.create(Schema.Type.NULL); + /** + * Utility to mangle the fully qualified class name into a valid symbol. + */ + public static String mangleFullyQualified(String fullName) { + int lastDot = fullName.lastIndexOf('.'); + + if (lastDot < 0) { + return mangleTypeIdentifier(fullName); + } else { + String namespace = fullName.substring(0, lastDot); + String typeName = fullName.substring(lastDot + 1); + + return mangle(namespace) + "." + mangleTypeIdentifier(typeName); + } + } + + /** + * Utility for template use. Adds a dollar sign to reserved words. + */ + public static String mangle(String word) { + return mangle(word, false); + } + + /** + * Utility for template use. Adds a dollar sign to reserved words. + */ + public static String mangle(String word, boolean isError) { + return mangle(word, isError ? ERROR_RESERVED_WORDS : RESERVED_WORDS); + } + + /** + * Utility for template use. Adds a dollar sign to reserved words in type + * identifiers. + */ + public static String mangleTypeIdentifier(String word) { + return mangleTypeIdentifier(word, false); + } + + /** + * Utility for template use. Adds a dollar sign to reserved words in type + * identifiers. + */ + public static String mangleTypeIdentifier(String word, boolean isError) { + return mangle(word, isError ? ERROR_RESERVED_WORDS : TYPE_IDENTIFIER_RESERVED_WORDS); + } + + /** + * Utility for template use. Adds a dollar sign to reserved words. + */ + public static String mangle(String word, Set reservedWords) { + return mangle(word, reservedWords, false); + } + + public static String mangleMethod(String word, boolean isError) { + return mangle(word, isError ? ERROR_RESERVED_WORDS : ACCESSOR_MUTATOR_RESERVED_WORDS, true); + } + + /** + * Utility for template use. Adds a dollar sign to reserved words. + */ + public static String mangle(String word, Set reservedWords, boolean isMethod) { + if (word == null || word.isBlank()) { + return word; + } + if (word.contains(".")) { + // If the 'word' is really a full path of a class we must mangle just the + String[] packageWords = word.split("\\."); + String[] newPackageWords = new String[packageWords.length]; + + for (int i = 0; i < packageWords.length; i++) { + String oldName = packageWords[i]; + newPackageWords[i] = mangle(oldName, reservedWords, false); + } + + return String.join(".", newPackageWords); + } + if (reservedWords.contains(word) || (isMethod && reservedWords + .contains(Character.toLowerCase(word.charAt(0)) + ((word.length() > 1) ? word.substring(1) : "")))) { + return word + "$"; + } + return word; + } + /** Undoes mangling for reserved words. */ protected static String unmangle(String word) { while (word.endsWith("$")) { @@ -328,20 +462,15 @@ private Class getWrapper(Schema schema) { public static String getClassName(Schema schema) { String namespace = schema.getNamespace(); String name = schema.getName(); - if (namespace == null || "".equals(namespace)) + if (namespace == null || namespace.isEmpty()) return name; String dot = namespace.endsWith("$") ? "" : "."; // back-compatibly handle $ - return namespace + dot + name; + return mangle(namespace) + dot + mangleTypeIdentifier(name); } // cache for schemas created from Class objects. Use ClassValue to avoid // locking classloaders and is GC and thread safe. - private final ClassValue schemaClassCache = new ClassValue() { - @Override - protected Schema computeValue(Class type) { - return createSchema(type, new HashMap<>()); - } - }; + private final ClassValueCache schemaClassCache = new ClassValueCache<>(c -> createSchema(c, new HashMap<>())); // for non-class objects, use a WeakHashMap, but this needs a sync block around // it private final Map schemaTypeCache = Collections.synchronizedMap(new WeakHashMap<>()); @@ -350,7 +479,7 @@ protected Schema computeValue(Class type) { public Schema getSchema(java.lang.reflect.Type type) { try { if (type instanceof Class) { - return schemaClassCache.get((Class) type); + return schemaClassCache.apply((Class) type); } return schemaTypeCache.computeIfAbsent(type, t -> createSchema(t, new HashMap<>())); } catch (Exception e) { @@ -388,9 +517,11 @@ else if (type instanceof ParameterizedType) { } else if (Map.class.isAssignableFrom(raw)) { // map java.lang.reflect.Type key = params[0]; java.lang.reflect.Type value = params[1]; - if (!(key instanceof Class && CharSequence.class.isAssignableFrom((Class) key))) - throw new AvroTypeException("Map key class not CharSequence: " + key); + if (!(key instanceof Class && CharSequence.class.isAssignableFrom((Class) key))) + throw new AvroTypeException("Map key class not CharSequence: " + SchemaUtil.describe(key)); return Schema.createMap(createSchema(value, names)); + } else if (Optional.class.isAssignableFrom(raw)) { + return Schema.createUnion(Schema.create(Schema.Type.NULL), createSchema(params[0], names)); } else { return createSchema(raw, names); } @@ -404,8 +535,8 @@ else if (type instanceof ParameterizedType) { if (!fullName.equals(getClassName(schema))) // HACK: schema mismatches class. maven shade plugin? try replacing. - schema = new Schema.Parser() - .parse(schema.toString().replace(schema.getNamespace(), c.getPackage().getName())); + schema = SchemaParser + .parseSingle(schema.toString().replace(schema.getNamespace(), c.getPackage().getName())); } catch (NoSuchFieldException e) { throw new AvroRuntimeException("Not a Specific class: " + c); } catch (IllegalAccessException e) { @@ -474,7 +605,7 @@ public static Object newInstance(Class c, Schema s) { boolean useSchema = SchemaConstructable.class.isAssignableFrom(c); Object result; try { - Constructor meth = CTOR_CACHE.get(c); + Constructor meth = CTOR_CACHE.apply(c); result = meth.newInstance(useSchema ? new Object[] { s } : null); } catch (Exception e) { throw new RuntimeException(e); @@ -512,8 +643,8 @@ public InstanceSupplier getNewRecordSupplier(Schema schema) { } boolean useSchema = SchemaConstructable.class.isAssignableFrom(c); - Constructor meth = (Constructor) CTOR_CACHE.get(c); - Object[] params = useSchema ? new Object[] { schema } : (Object[]) null; + Constructor meth = CTOR_CACHE.apply(c); + Object[] params = useSchema ? new Object[] { schema } : null; return (old, sch) -> { try { diff --git a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificDatumReader.java b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificDatumReader.java index d924c8e04b7..ca9da138c38 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificDatumReader.java +++ b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificDatumReader.java @@ -22,14 +22,36 @@ import org.apache.avro.AvroRuntimeException; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.io.ResolvingDecoder; +import org.apache.avro.util.ClassSecurityValidator.SystemPropertiesPredicate; import org.apache.avro.util.ClassUtils; + import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import org.apache.avro.util.ClassSecurityValidator; /** * {@link org.apache.avro.io.DatumReader DatumReader} for generated Java * classes. */ public class SpecificDatumReader extends GenericDatumReader { + + /** + * @deprecated Use {@link SystemPropertiesPredicate} instead. + * @see ClassSecurityValidator + */ + @Deprecated + public static final String[] SERIALIZABLE_PACKAGES = SystemPropertiesPredicate.SERIALIZABLE_PACKAGES + .toArray(new String[0]); + + /** + * @deprecated Use {@link SystemPropertiesPredicate} instead. + * @see ClassSecurityValidator + */ + @Deprecated + public static final String[] SERIALIZABLE_CLASSES = SystemPropertiesPredicate.SERIALIZABLE_CLASSES + .toArray(new String[0]); + public SpecificDatumReader() { this(null, null, SpecificData.get()); } @@ -101,12 +123,31 @@ private Class getPropAsClass(Schema schema, String prop) { if (name == null) return null; try { - return ClassUtils.forName(getData().getClassLoader(), name); + Class clazz = ClassUtils.forName(getData().getClassLoader(), name); + return clazz; } catch (ClassNotFoundException e) { throw new AvroRuntimeException(e); } } + /** + * @deprecated Use {@link SystemPropertiesPredicate} instead. + * @see ClassSecurityValidator + */ + @Deprecated + public final List getTrustedPackages() { + return Arrays.asList(SERIALIZABLE_PACKAGES); + } + + /** + * @deprecated Use {@link SystemPropertiesPredicate} instead. + * @see ClassSecurityValidator + */ + @Deprecated + public final List getTrustedClasses() { + return Arrays.asList(SERIALIZABLE_CLASSES); + } + @Override protected Object readRecord(Object old, Schema expected, ResolvingDecoder in) throws IOException { SpecificData data = getSpecificData(); diff --git a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificDatumWriter.java b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificDatumWriter.java index 46118474f1b..17214031a6e 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificDatumWriter.java +++ b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificDatumWriter.java @@ -23,8 +23,12 @@ import org.apache.avro.Conversion; import org.apache.avro.LogicalType; import org.apache.avro.Schema; +import org.apache.avro.path.TracingAvroTypeException; import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.path.TracingClassCastException; +import org.apache.avro.path.TracingNullPointException; import org.apache.avro.io.Encoder; +import org.apache.avro.path.LocationStep; /** * {@link org.apache.avro.io.DatumWriter DatumWriter} for generated Java @@ -77,7 +81,11 @@ protected void writeRecord(Schema schema, Object datum, Encoder out) throws IOEx if (datum instanceof SpecificRecordBase && this.getSpecificData().useCustomCoders()) { SpecificRecordBase d = (SpecificRecordBase) datum; if (d.hasCustomCoders()) { - d.customEncode(out); + try { + d.customEncode(out); + } catch (NullPointerException e) { + throw new TracingNullPointException(e, null, true); + } return; } } @@ -98,10 +106,9 @@ protected void writeField(Object datum, Schema.Field f, Encoder out, Object stat try { writeWithoutConversion(fieldSchema, value, out); - } catch (NullPointerException e) { - throw npe(e, " in field '" + f.name() + "'"); - } catch (ClassCastException cce) { - throw addClassCastMsg(cce, " in field '" + f.name() + "'"); + } catch (TracingNullPointException | TracingClassCastException | TracingAvroTypeException e) { + e.tracePath(new LocationStep(".", f.name())); + throw e; } catch (AvroTypeException ate) { throw addAvroTypeMsg(ate, " in field '" + f.name() + "'"); } diff --git a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificExceptionBase.java b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificExceptionBase.java index 82c23f129b4..64667ba2420 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificExceptionBase.java +++ b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificExceptionBase.java @@ -62,7 +62,7 @@ public boolean equals(Object that) { return false; // not a record if (this.getClass() != that.getClass()) return false; // not same schema - return SpecificData.get().compare(this, that, this.getSchema()) == 0; + return this.getSpecificData().compare(this, that, this.getSchema()) == 0; } @Override @@ -76,4 +76,9 @@ public int hashCode() { @Override public abstract void readExternal(ObjectInput in) throws IOException; + public SpecificData getSpecificData() { + // Default implementation for backwards compatibility, overridden in generated + // code + return SpecificData.get(); + } } diff --git a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificRecordBase.java b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificRecordBase.java index 07df303b329..8cf7d5bfe13 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificRecordBase.java +++ b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificRecordBase.java @@ -35,15 +35,6 @@ public abstract class SpecificRecordBase implements SpecificRecord, Comparable, GenericRecord, Externalizable { - @Override - public abstract Schema getSchema(); - - @Override - public abstract Object get(int field); - - @Override - public abstract void put(int field, Object value); - public SpecificData getSpecificData() { // Default implementation for backwards compatibility, overridden in generated // code @@ -105,12 +96,12 @@ public String toString() { @Override public void writeExternal(ObjectOutput out) throws IOException { - new SpecificDatumWriter(getSchema()).write(this, SpecificData.getEncoder(out)); + new SpecificDatumWriter<>(getSchema()).write(this, SpecificData.getEncoder(out)); } @Override public void readExternal(ObjectInput in) throws IOException { - new SpecificDatumReader(getSchema()).read(this, SpecificData.getDecoder(in)); + new SpecificDatumReader<>(getSchema()).read(this, SpecificData.getDecoder(in)); } /** diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/ByteBufferInputStream.java b/lang/java/avro/src/main/java/org/apache/avro/util/ByteBufferInputStream.java index f0ae5cc8a5e..375abc23fbf 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/util/ByteBufferInputStream.java +++ b/lang/java/avro/src/main/java/org/apache/avro/util/ByteBufferInputStream.java @@ -25,7 +25,7 @@ /** Utility to present {@link ByteBuffer} data as an {@link InputStream}. */ public class ByteBufferInputStream extends InputStream { - private List buffers; + private final List buffers; private int current; public ByteBufferInputStream(List buffers) { @@ -65,6 +65,18 @@ public int read(byte[] b, int off, int len) throws IOException { } } + @Override + public int available() throws IOException { + long remaining = 0; + for (int i = current; i < buffers.size(); i++) { + remaining += buffers.get(i).remaining(); + if (remaining >= Integer.MAX_VALUE) { + return Integer.MAX_VALUE; + } + } + return (int) remaining; + } + /** * Read a buffer from the input without copying, if possible. */ @@ -90,7 +102,7 @@ public ByteBuffer readBuffer(int length) throws IOException { /** * Returns the next non-empty buffer. */ - private ByteBuffer getBuffer() throws IOException { + private ByteBuffer getBuffer() { while (current < buffers.size()) { ByteBuffer buffer = buffers.get(current); if (buffer.hasRemaining()) diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/ClassSecurityValidator.java b/lang/java/avro/src/main/java/org/apache/avro/util/ClassSecurityValidator.java new file mode 100644 index 00000000000..b50d0f0250a --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/util/ClassSecurityValidator.java @@ -0,0 +1,254 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.util; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.NavigableSet; +import java.util.Objects; +import java.util.Set; +import java.util.TreeSet; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * Validates that a class is trusted to be included in Avro schemas. To be used + * by {@link ClassUtils} which therefore automatically guards not only the + * loading of the classes but, since the class names are translated into + * {@link Class} objects by using {@link ClassUtils}, also guards any other + * reflection-based mechanisms (e.g. instantiation, setting internal variables). + * + * @see #setGlobal(ClassSecurityPredicate) + * @see #getGlobal() + */ +public final class ClassSecurityValidator { + + /** + * Validates that the class is trusted to be included in Avro schemas. + * + *

    + * Note: this method shall be invoked with un-initialized classes only to + * prevent any potential security issues the initialization may trigger. + * + * @param clazz the class to validate + * @throws SecurityException if the class is not trusted + */ + public static void validate(Class clazz) { + while (clazz.isArray()) { + clazz = clazz.getComponentType(); + } + if (clazz.isPrimitive()) { + return; + } + if (!globalInstance.isTrusted(clazz)) { + globalInstance.forbiddenClass(clazz.getName()); + } + } + + /** + * Sets the global {@link ClassSecurityPredicate} that is used by + * {@link ClassUtils} to validate the trusted classes. + * + * @param validator the validator to use + */ + public static void setGlobal(ClassSecurityPredicate validator) { + globalInstance = Objects.requireNonNull(validator); + } + + /** + * Returns the global {@link ClassSecurityPredicate} that is used by + * {@link ClassUtils} to validate the trusted classes. + * + * @return the global validator + */ + public static ClassSecurityPredicate getGlobal() { + return globalInstance; + } + + private ClassSecurityValidator() { + } + + /** + * A predicate that checks if a class is trusted to be included in Avro schemas. + */ + public interface ClassSecurityPredicate { + /** + * Checks if the class is trusted to be included in Avro schemas. + * + * @param clazz the class to check + * @return true if the class is trusted, false otherwise + */ + boolean isTrusted(Class clazz); + + /** + * Throws a {@link SecurityException} with a message indicating that the class + * is not trusted to be included in Avro schemas. + * + * @param className the name of the class that is not trusted + */ + default void forbiddenClass(String className) { + throw new SecurityException("Forbidden " + className + "! This class is not trusted to be included in Avro " + + "schemas. You may either use the system properties org.apache.avro.SERIALIZABLE_CLASSES and " + + "org.apache.avro.SERIALIZABLE_PACKAGES to set the comma separated list of the classes or packages you trust, " + + "or you can set them via the API (see org.apache.avro.util.ClassSecurityValidator)."); + } + } + + /** + * A couple of trusted classes that are safe to be loaded, instantiated with any + * constructors or alter any internals via reflection. + */ + public static final ClassSecurityPredicate DEFAULT_TRUSTED_CLASSES = builder().add("java.lang.Boolean") + .add("java.lang.Byte").add("java.lang.Character").add("java.lang.CharSequence").add("java.lang.Double") + .add("java.lang.Enum").add("java.lang.Float").add("java.lang.Integer").add("java.lang.Long") + .add("java.lang.Number").add("java.lang.Object").add("java.lang.Short").add("java.lang.String") + .add("java.lang.Void").add("java.math.BigDecimal").add("java.math.BigInteger").build(); + + /** + * The predicate that uses the system properties + * {@value SystemPropertiesPredicate#SYSPROP_SERIALIZABLE_CLASSES} and + * {@value SystemPropertiesPredicate#SYSPROP_SERIALIZABLE_PACKAGES}. + */ + public static final ClassSecurityPredicate SYSTEM_PROPERTIES = new SystemPropertiesPredicate(); + + /** + * The default predicate that uses both the system properties and the hard-coded + * trusted classes. + * + * @see #DEFAULT_TRUSTED_CLASSES + * @see #SYSTEM_PROPERTIES + */ + public static final ClassSecurityPredicate DEFAULT = composite(DEFAULT_TRUSTED_CLASSES, SYSTEM_PROPERTIES); + + private static ClassSecurityPredicate globalInstance = DEFAULT; + + /** + * Creates a builder for a {@link ClassSecurityValidator} that validates the + * trusted classes by whitelisting their names. Note: no parent validator is + * used. + * + * @return a new {@link ClassSecurityValidator} builder + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Creates a composite {@link ClassSecurityValidator} that delegates to the + * given validators. + * + * @param validators the validators to delegate to + * @return a new {@link ClassSecurityValidator} that delegates to the given + * validators + */ + public static ClassSecurityPredicate composite(ClassSecurityPredicate... validators) { + return clazz -> Arrays.stream(validators).anyMatch(v -> v.isTrusted(clazz)); + } + + public static class Builder { + private final Set allowedClassNames = new HashSet<>(); + + private Builder() { + } + + public Builder add(String className) { + allowedClassNames.add(className); + return this; + } + + public Builder add(Class clazz) { + return add(clazz.getName()); + } + + public ClassSecurityPredicate build() { + return clazz -> allowedClassNames.contains(clazz.getName()); + } + } + + public static class SystemPropertiesPredicate implements ClassSecurityPredicate { + + /** + * The set of trusted classes specified by the system property + * {@value #SYSPROP_SERIALIZABLE_CLASSES}. Empty by default. + */ + public static final Set SERIALIZABLE_CLASSES; + + /** + * The set of trusted packages specified by the system property + * {@value #SYSPROP_SERIALIZABLE_PACKAGES}. Empty by default. + */ + public static final NavigableSet SERIALIZABLE_PACKAGES; + + private static final boolean TRUST_ALL_PACKAGES; + + private static final String SYSPROP_SERIALIZABLE_CLASSES = "org.apache.avro.SERIALIZABLE_CLASSES"; + + private static final String SYSPROP_SERIALIZABLE_PACKAGES = "org.apache.avro.SERIALIZABLE_PACKAGES"; + + static { + // add the hard-coded trusted classes as well + SERIALIZABLE_CLASSES = Collections.unmodifiableSet( + streamPropertyEntries(System.getProperty(SYSPROP_SERIALIZABLE_CLASSES)).collect(Collectors.toSet())); + + // no default serializable packages are hard-coded + NavigableSet packages = streamPropertyEntries(System.getProperty(SYSPROP_SERIALIZABLE_PACKAGES)) + // Add a '.' suffix to ensure we'll be matching package names instead of + // arbitrary prefixes, except for the wildcard "*", which allows all + // packages (this is only safe in fully controlled environments!). + .map(entry -> "*".equals(entry) ? entry : entry + ".").collect(TreeSet::new, TreeSet::add, TreeSet::addAll); + TRUST_ALL_PACKAGES = packages.remove("*"); + + SERIALIZABLE_PACKAGES = Collections.unmodifiableNavigableSet(packages); + } + + /** + * Parse a comma separated list into non-empty entries. Leading and trailing + * whitespace is stripped. + * + * @param commaSeparatedEntries the comma separated list of entries + * @return a stream of the entries + */ + private static Stream streamPropertyEntries(String commaSeparatedEntries) { + if (commaSeparatedEntries == null) { + return Stream.empty(); + } + return Stream.of(commaSeparatedEntries.split(",")).map(s -> s.replaceAll("^\\s+|\\s+$", "")) + .filter(s -> !s.isEmpty()); + } + + private SystemPropertiesPredicate() { + } + + @Override + public boolean isTrusted(Class clazz) { + if (TRUST_ALL_PACKAGES) { + return true; + } + + String className = clazz.getName(); + + if (SERIALIZABLE_CLASSES.contains(className)) { + return true; + } + + String lower = SERIALIZABLE_PACKAGES.lower(className); + return lower != null && className.startsWith(lower); + } + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/ClassUtils.java b/lang/java/avro/src/main/java/org/apache/avro/util/ClassUtils.java index dad59a551d6..c21f276d6d0 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/util/ClassUtils.java +++ b/lang/java/avro/src/main/java/org/apache/avro/util/ClassUtils.java @@ -52,7 +52,7 @@ public static Class forName(Class contextClass, String className) throws C c = forName(className, Thread.currentThread().getContextClassLoader()); } if (c == null) { - throw new ClassNotFoundException("Failed to load class" + className); + throw new ClassNotFoundException("Failed to load class " + className); } return c; } @@ -75,14 +75,14 @@ public static Class forName(ClassLoader classLoader, String className) throws c = forName(className, Thread.currentThread().getContextClassLoader()); } if (c == null) { - throw new ClassNotFoundException("Failed to load class" + className); + throw new ClassNotFoundException("Failed to load class " + className); } return c; } /** * Loads a {@link Class} from the specified {@link ClassLoader} without throwing - * {@link ClassNotFoundException}. + * {@link ClassNotFoundException}. The class is loaded without initialization. * * @param className * @param classLoader @@ -92,7 +92,11 @@ private static Class forName(String className, ClassLoader classLoader) { Class c = null; if (classLoader != null && className != null) { try { - c = Class.forName(className, true, classLoader); + // Load the class without initializing it so we can distinguish between + // ClassNotFoundException and SecurityException (that may be thrown by the + // validator). + c = Class.forName(className, false, classLoader); + ClassSecurityValidator.validate(c); } catch (ClassNotFoundException e) { // Ignore and return null } diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/MapEntry.java b/lang/java/avro/src/main/java/org/apache/avro/util/MapEntry.java new file mode 100644 index 00000000000..ec47c45a012 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/util/MapEntry.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.util; + +import java.util.Map; + +/** + * An implementation of {@link Map.Entry} with well-defined member names. + *

    + * Using this class helps make Avro immune from the naming variations of + * key/value fields among several {@link Map.Entry} implementations. If objects + * of this class are used instead of the regular ones obtained by + * {@link Map#entrySet()}, then we need not worry about the actual field names + * or any changes to them in the future. + *

    + * Example: {@code ConcurrentHashMap.MapEntry} does not name the fields as key/ + * value in Java 1.8 while it used to do so in Java 1.7 + * + * @param Key of the map-entry + * @param Value of the map-entry + */ +public class MapEntry implements Map.Entry { + + K key; + V value; + + public MapEntry(K key, V value) { + this.key = key; + this.value = value; + } + + @Override + public K getKey() { + return key; + } + + @Override + public V getValue() { + return value; + } + + @Override + public V setValue(V value) { + V oldValue = this.value; + this.value = value; + return oldValue; + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/MapUtil.java b/lang/java/avro/src/main/java/org/apache/avro/util/MapUtil.java new file mode 100644 index 00000000000..394aa2b3a63 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/util/MapUtil.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.util; + +import java.util.concurrent.ConcurrentMap; +import java.util.function.Function; + +public class MapUtil { + + private MapUtil() { + super(); + } + + /** + * A temporary workaround for Java 8 specific performance issue JDK-8161372. + * + * @see JDK-8161372 + * @deprecated As of JDK 1.9 this issue has been resolved. + */ + // TODO: Remove for 1.13.0 or later + @Deprecated + public static V computeIfAbsent(ConcurrentMap map, K key, Function mappingFunction) { + V value = map.get(key); + if (value != null) { + return value; + } + return map.computeIfAbsent(key, mappingFunction::apply); + } + +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/NonCopyingByteArrayOutputStream.java b/lang/java/avro/src/main/java/org/apache/avro/util/NonCopyingByteArrayOutputStream.java index b72e92e89be..8b08a89c093 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/util/NonCopyingByteArrayOutputStream.java +++ b/lang/java/avro/src/main/java/org/apache/avro/util/NonCopyingByteArrayOutputStream.java @@ -20,22 +20,45 @@ import java.io.ByteArrayOutputStream; import java.nio.ByteBuffer; +import java.util.Objects; + +import org.apache.avro.SystemLimitException; /** * Utility to make data written to an {@link ByteArrayOutputStream} directly - * available as a {@link ByteBuffer}. + * available as a {@link ByteBuffer}.Supports limits to the amount of data which + * may be written. All decompressors MUST create capacity restricted streams to + * prevent maliciously compressed data to trigger memory problems across + * threads. + * */ public class NonCopyingByteArrayOutputStream extends ByteArrayOutputStream { /** - * Creates a new byte array output stream, with a buffer capacity of the - * specified size, in bytes. + * Size limit, -1 for no limits. + */ + private final long limit; + + /** + * Creates a new byte array output stream, with no size limit. * * @param size the initial size * @throws IllegalArgumentException if size is negative */ public NonCopyingByteArrayOutputStream(int size) { + this(size, -1); + } + + /** + * Creates a new byte array output stream, with a buffer capacity of the + * specified size, in bytes, capacity limit as specified in {@code limit}. + * + * @param size buffer capacity + * @param limit size limit or -1 for no limit. + */ + private NonCopyingByteArrayOutputStream(final int size, final long limit) { super(size); + this.limit = limit; } /** @@ -48,4 +71,65 @@ public NonCopyingByteArrayOutputStream(int size) { public ByteBuffer asByteBuffer() { return ByteBuffer.wrap(super.buf, 0, super.count); } + + /** + * Check there is capacity to write data. Throws SystemLimitException if the + * limit is exceeded. + * + * @param bytes bytes to add + */ + private void checkCapacity(int bytes) { + if (limit > 0) { + SystemLimitException.checkMaxDecompressCapacity(limit, size(), bytes); + } + } + + @Override + public synchronized void write(final int b) { + checkCapacity(1); + super.write(b); + } + + @Override + public synchronized void write(final byte[] b, final int off, final int len) { + Objects.requireNonNull(b); + Objects.checkFromIndexSize(off, len, b.length); + checkCapacity(len); + super.write(b, off, len); + } + + @Override + public void writeBytes(final byte[] b) { + Objects.requireNonNull(b); + checkCapacity(b.length); + super.writeBytes(b); + } + + /** + * Creates a new byte array output stream, with a buffer capacity of the + * specified size, in bytes. The amount of data which can be written to any + * output stream is limited by the system property + * {@link SystemLimitException#MAX_DECOMPRESS_LENGTH_PROPERTY} + * + * @param size buffer capacity + * @return the output stream + */ + public static NonCopyingByteArrayOutputStream capacityLimitedOutputStream(final int size) { + final long limit = SystemLimitException.MAX_DECOMPRESS_LENGTH; + return new NonCopyingByteArrayOutputStream((int) Math.min(size, limit), limit); + } + + /** + * Creates a new byte array output stream, with a buffer capacity of the + * specified size, in bytes, capacity limit as specified in {@code limit}. + * + * @param size buffer capacity + * @param limit max size of output buffer + * @return the output stream + */ + public static NonCopyingByteArrayOutputStream capacityLimitedOutputStream(final int size, long limit) { + final int initialSize = limit > 0 ? (int) Math.min(size, limit) : size; + return new NonCopyingByteArrayOutputStream(initialSize, limit); + } + } diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/RandomData.java b/lang/java/avro/src/main/java/org/apache/avro/util/RandomData.java index e4623fce2ea..2124379162a 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/util/RandomData.java +++ b/lang/java/avro/src/main/java/org/apache/avro/util/RandomData.java @@ -17,27 +17,35 @@ */ package org.apache.avro.util; +import org.apache.avro.LogicalType; +import org.apache.avro.LogicalTypes; +import org.apache.avro.Schema; +import org.apache.avro.SchemaParser; +import org.apache.avro.file.CodecFactory; +import org.apache.avro.file.DataFileWriter; +import org.apache.avro.generic.GenericArray; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumWriter; + import java.io.File; -import java.nio.Buffer; import java.nio.ByteBuffer; import java.nio.charset.Charset; -import java.util.HashMap; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.time.Duration; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Random; - -import org.apache.avro.Schema; -import org.apache.avro.file.CodecFactory; -import org.apache.avro.file.DataFileWriter; -import org.apache.avro.generic.GenericArray; -import org.apache.avro.generic.GenericData; -import org.apache.avro.generic.GenericDatumWriter; -import org.apache.avro.generic.GenericRecord; +import java.util.concurrent.ThreadLocalRandom; /** Generates schema data as Java objects with random values. */ public class RandomData implements Iterable { public static final String USE_DEFAULT = "use-default"; + private final GenericData genericData; + + private static final int MILLIS_IN_DAY = (int) Duration.ofDays(1).toMillis(); private final Schema root; private final long seed; @@ -57,6 +65,23 @@ public RandomData(Schema schema, int count, boolean utf8ForString) { } public RandomData(Schema schema, int count, long seed, boolean utf8ForString) { + this(GenericData.get(), schema, count, seed, utf8ForString); + } + + public RandomData(GenericData genericData, Schema schema, int count) { + this(genericData, schema, count, false); + } + + public RandomData(GenericData genericData, Schema schema, int count, long seed) { + this(genericData, schema, count, seed, false); + } + + public RandomData(GenericData genericData, Schema schema, int count, boolean utf8ForString) { + this(genericData, schema, count, System.currentTimeMillis(), utf8ForString); + } + + public RandomData(GenericData genericData, Schema schema, int count, long seed, boolean utf8ForString) { + this.genericData = genericData; this.root = schema; this.seed = seed; this.count = count; @@ -67,7 +92,7 @@ public RandomData(Schema schema, int count, long seed, boolean utf8ForString) { public Iterator iterator() { return new Iterator() { private int n; - private Random random = new Random(seed); + private final Random random = new Random(seed); @Override public boolean hasNext() { @@ -91,26 +116,25 @@ public void remove() { private Object generate(Schema schema, Random random, int d) { switch (schema.getType()) { case RECORD: - GenericRecord record = new GenericData.Record(schema); + Object record = genericData.newRecord(null, schema); for (Schema.Field field : schema.getFields()) { Object value = (field.getObjectProp(USE_DEFAULT) == null) ? generate(field.schema(), random, d + 1) : GenericData.get().getDefaultValue(field); - record.put(field.name(), value); + genericData.setField(record, field.name(), field.pos(), value); } return record; case ENUM: List symbols = schema.getEnumSymbols(); - return new GenericData.EnumSymbol(schema, symbols.get(random.nextInt(symbols.size()))); + return genericData.createEnum(symbols.get(random.nextInt(symbols.size())), schema); case ARRAY: - int length = (random.nextInt(5) + 2) - d; - @SuppressWarnings("rawtypes") - GenericArray array = new GenericData.Array(length <= 0 ? 0 : length, schema); + int length = Math.max(0, (random.nextInt(5) + 2) - d); + GenericArray array = (GenericArray) genericData.newArray(null, length, schema); for (int i = 0; i < length; i++) array.add(generate(schema.getElementType(), random, d + 1)); return array; case MAP: - length = (random.nextInt(5) + 2) - d; - Map map = new HashMap<>(length <= 0 ? 0 : length); + length = Math.max(0, (random.nextInt(5) + 2) - d); + Map map = (Map) genericData.newMap(null, length); for (int i = 0; i < length; i++) { map.put(randomString(random, 40), generate(schema.getValueType(), random, d + 1)); } @@ -121,15 +145,15 @@ private Object generate(Schema schema, Random random, int d) { case FIXED: byte[] bytes = new byte[schema.getFixedSize()]; random.nextBytes(bytes); - return new GenericData.Fixed(schema, bytes); + return genericData.createFixed(null, bytes, schema); case STRING: return randomString(random, 40); case BYTES: return randomBytes(random, 40); case INT: - return random.nextInt(); + return this.randomInt(random, schema.getLogicalType()); case LONG: - return random.nextLong(); + return this.randomLong(random, schema.getLogicalType()); case FLOAT: return random.nextFloat(); case DOUBLE: @@ -143,7 +167,24 @@ private Object generate(Schema schema, Random random, int d) { } } - private static final Charset UTF8 = Charset.forName("UTF-8"); + private static final Charset UTF8 = StandardCharsets.UTF_8; + + private int randomInt(Random random, LogicalType type) { + if (type instanceof LogicalTypes.TimeMillis) { + return random.nextInt(RandomData.MILLIS_IN_DAY - 1); + } + // LogicalTypes.Date LocalDate.MAX.toEpochDay() > Integer.MAX; + return random.nextInt(); + } + + private long randomLong(Random random, LogicalType type) { + if (type instanceof LogicalTypes.TimeMicros) { + return ThreadLocalRandom.current().nextLong(RandomData.MILLIS_IN_DAY * 1000L); + } + // For LogicalTypes.TimestampMillis, every long would be OK, + // Instant.MAX.toEpochMilli() failed and would be > Long.MAX_VALUE. + return random.nextLong(); + } private Object randomString(Random random, int maxLength) { int length = random.nextInt(maxLength); @@ -156,7 +197,7 @@ private Object randomString(Random random, int maxLength) { private static ByteBuffer randomBytes(Random rand, int maxLength) { ByteBuffer bytes = ByteBuffer.allocate(rand.nextInt(maxLength)); - ((Buffer) bytes).limit(bytes.capacity()); + bytes.limit(bytes.capacity()); rand.nextBytes(bytes.array()); return bytes; } @@ -166,16 +207,17 @@ public static void main(String[] args) throws Exception { System.out.println("Usage: RandomData [codec]"); System.exit(-1); } - Schema sch = new Schema.Parser().parse(new File(args[0])); - DataFileWriter writer = new DataFileWriter<>(new GenericDatumWriter<>()); - writer.setCodec(CodecFactory.fromString(args.length >= 4 ? args[3] : "null")); - writer.create(sch, new File(args[1])); - try { + Schema sch = new SchemaParser().parse(new File(args[0])).mainSchema(); + try (DataFileWriter writer = new DataFileWriter<>(new GenericDatumWriter<>())) { + writer.setCodec(CodecFactory.fromString(args.length >= 4 ? args[3] : "null")); + writer.setMeta("user_metadata", "someByteArray".getBytes(StandardCharsets.UTF_8)); + File file = new File(args[1]); + Files.createDirectories(Paths.get(file.getParent())); + writer.create(sch, file); + for (Object datum : new RandomData(sch, Integer.parseInt(args[2]))) { writer.append(datum); } - } finally { - writer.close(); } } } diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/SchemaResolver.java b/lang/java/avro/src/main/java/org/apache/avro/util/SchemaResolver.java new file mode 100644 index 00000000000..83285d371ae --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/util/SchemaResolver.java @@ -0,0 +1,275 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.util; + +import org.apache.avro.AvroTypeException; +import org.apache.avro.Schema; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.EnumSet; +import java.util.IdentityHashMap; +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Function; + +import static java.util.Objects.requireNonNull; +import static org.apache.avro.Schema.Type.ARRAY; +import static org.apache.avro.Schema.Type.ENUM; +import static org.apache.avro.Schema.Type.FIXED; +import static org.apache.avro.Schema.Type.MAP; +import static org.apache.avro.Schema.Type.RECORD; +import static org.apache.avro.Schema.Type.UNION; + +/** + * Utility class to resolve schemas that are unavailable at the point they are + * referenced in a schema file. This class is meant for internal use: use at + * your own risk! + */ +public final class SchemaResolver { + + private SchemaResolver() { + } + + private static final String UR_SCHEMA_ATTR = "org.apache.avro.idl.unresolved.name"; + + private static final String UR_SCHEMA_NAME = "UnresolvedSchema"; + + private static final String UR_SCHEMA_NS = "org.apache.avro.compiler"; + + private static final AtomicInteger COUNTER = new AtomicInteger(); + + /** + * Create a schema to represent an "unresolved" schema. (used to represent a + * schema whose definition does not exist, yet). + * + * @param name a schema name + * @return an unresolved schema for the given name + */ + public static Schema unresolvedSchema(final String name) { + Schema schema = Schema.createRecord(UR_SCHEMA_NAME + '_' + COUNTER.getAndIncrement(), "unresolved schema", + UR_SCHEMA_NS, false, Collections.emptyList()); + schema.addProp(UR_SCHEMA_ATTR, name); + return schema; + } + + /** + * Is this an unresolved schema. + * + * @param schema a schema + * @return whether the schema is an unresolved schema + */ + public static boolean isUnresolvedSchema(final Schema schema) { + return (schema.getType() == Schema.Type.RECORD && schema.getProp(UR_SCHEMA_ATTR) != null && schema.getName() != null + && schema.getName().startsWith(UR_SCHEMA_NAME) && UR_SCHEMA_NS.equals(schema.getNamespace())); + } + + /** + * Get the unresolved schema name. + * + * @param schema an unresolved schema + * @return the name of the unresolved schema + */ + public static String getUnresolvedSchemaName(final Schema schema) { + if (!isUnresolvedSchema(schema)) { + throw new IllegalArgumentException("Not a unresolved schema: " + schema); + } + return schema.getProp(UR_SCHEMA_ATTR); + } + + /** + * Is this an unresolved schema? + */ + public static boolean isFullyResolvedSchema(final Schema schema) { + if (isUnresolvedSchema(schema)) { + return false; + } else { + return Schemas.visit(schema, new IsResolvedSchemaVisitor()); + } + } + + /** + * This visitor checks if the current schema is fully resolved. + */ + public static final class IsResolvedSchemaVisitor implements SchemaVisitor { + boolean hasUnresolvedParts; + + IsResolvedSchemaVisitor() { + hasUnresolvedParts = false; + } + + @Override + public SchemaVisitorAction visitTerminal(Schema terminal) { + hasUnresolvedParts = isUnresolvedSchema(terminal); + return hasUnresolvedParts ? SchemaVisitorAction.TERMINATE : SchemaVisitorAction.CONTINUE; + } + + @Override + public SchemaVisitorAction visitNonTerminal(Schema nonTerminal) { + hasUnresolvedParts = isUnresolvedSchema(nonTerminal); + if (hasUnresolvedParts) { + return SchemaVisitorAction.TERMINATE; + } + if (nonTerminal.getType() == Schema.Type.RECORD && !nonTerminal.hasFields()) { + // We're still initializing the type... + return SchemaVisitorAction.SKIP_SUBTREE; + } + return SchemaVisitorAction.CONTINUE; + } + + @Override + public SchemaVisitorAction afterVisitNonTerminal(Schema nonTerminal) { + return SchemaVisitorAction.CONTINUE; + } + + @Override + public Boolean get() { + return !hasUnresolvedParts; + } + } + + /** + * This visitor creates clone of the visited Schemata, minus the specified + * schema properties, and resolves all unresolved schemas. + */ + public static final class ResolvingVisitor implements SchemaVisitor { + private static final Set CONTAINER_SCHEMA_TYPES = EnumSet.of(RECORD, ARRAY, MAP, UNION); + private static final Set NAMED_SCHEMA_TYPES = EnumSet.of(RECORD, ENUM, FIXED); + + private final Function symbolTable; + private final IdentityHashMap replace; + + public ResolvingVisitor(final Function symbolTable) { + this.replace = new IdentityHashMap<>(); + this.symbolTable = symbolTable; + } + + @Override + public SchemaVisitorAction visitTerminal(final Schema terminal) { + Schema.Type type = terminal.getType(); + if (CONTAINER_SCHEMA_TYPES.contains(type)) { + if (!replace.containsKey(terminal)) { + throw new IllegalStateException("Schema " + terminal + " must be already processed"); + } + } else { + replace.put(terminal, terminal); + } + return SchemaVisitorAction.CONTINUE; + } + + @Override + public SchemaVisitorAction visitNonTerminal(final Schema nt) { + Schema.Type type = nt.getType(); + if (type == RECORD && !replace.containsKey(nt)) { + if (isUnresolvedSchema(nt)) { + // unresolved schema will get a replacement that we already encountered, + // or we will attempt to resolve. + final String unresolvedSchemaName = getUnresolvedSchemaName(nt); + Schema resSchema = symbolTable.apply(unresolvedSchemaName); + if (resSchema == null) { + throw new AvroTypeException("Undefined schema: " + unresolvedSchemaName); + } + Schema replacement = replace.computeIfAbsent(resSchema, schema -> { + Schemas.visit(schema, this); + return replace.get(schema); // This is not what the visitor returns! + }); + replace.put(nt, replacement); + } else { + // Create a clone without fields or properties. They will be added in + // afterVisitNonTerminal, as they can both create circular references. + // (see org.apache.avro.TestCircularReferences as an example) + replace.put(nt, Schema.createRecord(nt.getName(), nt.getDoc(), nt.getNamespace(), nt.isError())); + } + } + return SchemaVisitorAction.CONTINUE; + } + + public void copyProperties(final Schema first, final Schema second) { + // Logical type + Optional.ofNullable(first.getLogicalType()).ifPresent(logicalType -> logicalType.addToSchema(second)); + + // Aliases (if applicable) + if (NAMED_SCHEMA_TYPES.contains(first.getType())) { + first.getAliases().forEach(second::addAlias); + } + + // Other properties + first.getObjectProps().forEach(second::addProp); + } + + @Override + public SchemaVisitorAction afterVisitNonTerminal(final Schema nt) { + Schema.Type type = nt.getType(); + Schema newSchema; + switch (type) { + case RECORD: + if (!isUnresolvedSchema(nt)) { + newSchema = replace.get(nt); + // Check if we've already handled the replacement schema with a + // reentrant call to visit(...) from within the visitor. + if (!newSchema.hasFields()) { + List fields = nt.getFields(); + List newFields = new ArrayList<>(fields.size()); + for (Schema.Field field : fields) { + newFields.add(new Schema.Field(field, replace.get(field.schema()))); + } + newSchema.setFields(newFields); + copyProperties(nt, newSchema); + } + } + return SchemaVisitorAction.CONTINUE; + case UNION: + List types = nt.getTypes(); + List newTypes = new ArrayList<>(types.size()); + for (Schema sch : types) { + newTypes.add(requireNonNull(replace.get(sch))); + } + newSchema = Schema.createUnion(newTypes); + break; + case ARRAY: + newSchema = Schema.createArray(requireNonNull(replace.get(nt.getElementType()))); + break; + case MAP: + newSchema = Schema.createMap(requireNonNull(replace.get(nt.getValueType()))); + break; + default: + throw new IllegalStateException("Illegal type " + type + ", schema " + nt); + } + copyProperties(nt, newSchema); + replace.put(nt, newSchema); + return SchemaVisitorAction.CONTINUE; + } + + @Override + public Void get() { + return null; + } + + public Schema getResolved(Schema schema) { + return requireNonNull(replace.get(schema), + () -> "Unknown schema: " + schema.getFullName() + ". Was it resolved before?"); + } + + @Override + public String toString() { + return "ResolvingVisitor{symbolTable=" + symbolTable + ", replace=" + replace + '}'; + } + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/SchemaUtil.java b/lang/java/avro/src/main/java/org/apache/avro/util/SchemaUtil.java new file mode 100644 index 00000000000..0661981155d --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/util/SchemaUtil.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.util; + +import org.apache.avro.Schema; + +import java.util.StringJoiner; + +public class SchemaUtil { + + private SchemaUtil() { + // utility class + } + + public static String describe(Schema schema) { + if (schema == null) { + return "unknown"; + } + switch (schema.getType()) { + case UNION: + StringJoiner csv = new StringJoiner(", "); + for (Schema branch : schema.getTypes()) { + csv.add(describe(branch)); + } + return "[" + csv + "]"; + case MAP: + return "Map"; + case ARRAY: + return "List<" + describe(schema.getElementType()) + ">"; + default: + return schema.getName(); + } + } + + public static String describe(Object datum) { + if (datum == null) { + return "null"; + } + return datum + " (a " + datum.getClass().getName() + ")"; + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/SchemaVisitor.java b/lang/java/avro/src/main/java/org/apache/avro/util/SchemaVisitor.java new file mode 100644 index 00000000000..1ac35baeda7 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/util/SchemaVisitor.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.util; + +import org.apache.avro.Schema; + +public interface SchemaVisitor { + + /** + * Invoked for schemas that do not have "child" schemas (like string, int â€Ļ) or + * for a previously encountered schema with children, which will be treated as a + * terminal. (to avoid circular recursion) + */ + SchemaVisitorAction visitTerminal(Schema terminal); + + /** + * Invoked for schema with children before proceeding to visit the children. + */ + SchemaVisitorAction visitNonTerminal(Schema nonTerminal); + + /** + * Invoked for schemas with children after its children have been visited. + */ + SchemaVisitorAction afterVisitNonTerminal(Schema nonTerminal); + + /** + * Invoked when visiting is complete. + * + * @return a value that will be returned by the visit method. + */ + T get(); + + enum SchemaVisitorAction { + + /** + * continue visit. + */ + CONTINUE, + /** + * terminate visit. + */ + TERMINATE, + /** + * when returned from pre non terminal visit method the children of the non + * terminal are skipped. afterVisitNonTerminal for the current schema will not + * be invoked. + */ + SKIP_SUBTREE, + /** + * Skip visiting the siblings of this schema. + */ + SKIP_SIBLINGS + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/Schemas.java b/lang/java/avro/src/main/java/org/apache/avro/util/Schemas.java new file mode 100644 index 00000000000..927a0c37b43 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/util/Schemas.java @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.util; + +import org.apache.avro.Schema; +import org.apache.avro.Schema.Field; + +import java.util.ArrayDeque; +import java.util.Collections; +import java.util.Deque; +import java.util.IdentityHashMap; +import java.util.function.Supplier; +import java.util.stream.Collectors; + +/** + * Avro Schema utilities, to traverse... + */ +public final class Schemas { + + private Schemas() { + } + + /** + * Depth first visit. + */ + public static T visit(final Schema start, final SchemaVisitor visitor) { + // Set of Visited Schemas + IdentityHashMap visited = new IdentityHashMap<>(); + // Stack that contains the Schemas to process and afterVisitNonTerminal + // functions. + // Deque>> + // Using Either<...> has a cost we want to avoid... + Deque dq = new ArrayDeque<>(); + dq.push(start); + Object current; + while ((current = dq.poll()) != null) { + if (current instanceof Supplier) { + // We are executing a non-terminal post visit. + SchemaVisitor.SchemaVisitorAction action = ((Supplier) current).get(); + switch (action) { + case CONTINUE: + break; + case SKIP_SIBLINGS: + while (dq.peek() instanceof Schema) { + dq.remove(); + } + break; + case TERMINATE: + return visitor.get(); + case SKIP_SUBTREE: + default: + throw new UnsupportedOperationException("Invalid action " + action); + } + } else { + Schema schema = (Schema) current; + boolean terminate; + if (visited.containsKey(schema)) { + terminate = visitTerminal(visitor, schema, dq); + } else { + Schema.Type type = schema.getType(); + switch (type) { + case ARRAY: + terminate = visitNonTerminal(visitor, schema, dq, Collections.singleton(schema.getElementType())); + visited.put(schema, schema); + break; + case RECORD: + terminate = visitNonTerminal(visitor, schema, dq, () -> schema.getFields().stream().map(Field::schema) + .collect(Collectors.toCollection(ArrayDeque::new)).descendingIterator()); + visited.put(schema, schema); + break; + case UNION: + terminate = visitNonTerminal(visitor, schema, dq, schema.getTypes()); + visited.put(schema, schema); + break; + case MAP: + terminate = visitNonTerminal(visitor, schema, dq, Collections.singleton(schema.getValueType())); + visited.put(schema, schema); + break; + default: + terminate = visitTerminal(visitor, schema, dq); + break; + } + } + if (terminate) { + return visitor.get(); + } + } + } + return visitor.get(); + } + + private static boolean visitNonTerminal(final SchemaVisitor visitor, final Schema schema, final Deque dq, + final Iterable itSupp) { + SchemaVisitor.SchemaVisitorAction action = visitor.visitNonTerminal(schema); + switch (action) { + case CONTINUE: + dq.push((Supplier) () -> visitor.afterVisitNonTerminal(schema)); + itSupp.forEach(dq::push); + break; + case SKIP_SUBTREE: + dq.push((Supplier) () -> visitor.afterVisitNonTerminal(schema)); + break; + case SKIP_SIBLINGS: + while (dq.peek() instanceof Schema) { + dq.remove(); + } + break; + case TERMINATE: + return true; + default: + throw new UnsupportedOperationException("Invalid action " + action + " for " + schema); + } + return false; + } + + private static boolean visitTerminal(final SchemaVisitor visitor, final Schema schema, final Deque dq) { + SchemaVisitor.SchemaVisitorAction action = visitor.visitTerminal(schema); + switch (action) { + case CONTINUE: + break; + case SKIP_SIBLINGS: + while (dq.peek() instanceof Schema) { + dq.remove(); + } + break; + case TERMINATE: + return true; + case SKIP_SUBTREE: + default: + throw new UnsupportedOperationException("Invalid action " + action + " for " + schema); + } + return false; + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/TimePeriod.java b/lang/java/avro/src/main/java/org/apache/avro/util/TimePeriod.java new file mode 100644 index 00000000000..89496744f30 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/util/TimePeriod.java @@ -0,0 +1,393 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.util; + +import java.io.Serializable; +import java.time.DateTimeException; +import java.time.Duration; +import java.time.Period; +import java.time.chrono.ChronoPeriod; +import java.time.chrono.IsoChronology; +import java.time.temporal.ChronoUnit; +import java.time.temporal.Temporal; +import java.time.temporal.TemporalAmount; +import java.time.temporal.TemporalUnit; +import java.time.temporal.UnsupportedTemporalTypeException; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; + +import static java.time.temporal.ChronoUnit.DAYS; +import static java.time.temporal.ChronoUnit.MILLIS; +import static java.time.temporal.ChronoUnit.MONTHS; +import static java.util.Collections.unmodifiableList; +import static java.util.Objects.requireNonNull; + +/** + *

    + * A temporal amount to model an {@link org.apache.avro.LogicalTypes.Duration + * Avro duration} (the logical type). + *

    + * + *

    + * It consists of a number of months, days and milliseconds, all modelled as an + * unsigned integer. + *

    + * + *

    + * Compared to {@link Period java.time.Period}, this class has a smaller range + * ('only' supporting a little less than 358 million years), and cannot support + * negative periods. + *

    + * + *

    + * Compared to {@link Duration java.time.Duration}, this class has less + * precision (milliseconds compared to nanoseconds), cannot support negative + * durations, and has a much smaller range. Where {@code java.time.Duration} + * supports fixed ranges up to about 68 years, {@code TimePeriod} can only + * handle about 49 days. + *

    + * + *

    + * Comparison with the regular {@code java.time} classes: + *

    + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
    TimePeriod{@link Period}{@link Duration}
    Precisionmillisecondsdaysnanoseconds
    Time range (approx.)0 &endash; 49 daysunsupported-68 &endash; 68 years
    Date range (approx.)0 to 370 million years-2.3 to 2.3 billion yearsunsupported
    + * + * @see Avro + * specification on duration + */ +public final class TimePeriod implements TemporalAmount, Serializable { + private static final long MAX_UNSIGNED_INT = 0xffffffffL; + private static final long MONTHS_PER_YEAR = 12; + private static final long MONTHS_PER_DECADE = MONTHS_PER_YEAR * 10; + private static final long MONTHS_PER_CENTURY = MONTHS_PER_DECADE * 10; + private static final long MONTHS_PER_MILLENNIUM = MONTHS_PER_CENTURY * 10; + private static final long MILLIS_PER_SECOND = 1_000; + private static final long MILLIS_PER_MINUTE = MILLIS_PER_SECOND * 60; + private static final long MILLIS_PER_HOUR = MILLIS_PER_MINUTE * 60; + private static final long MILLIS_IN_HALF_DAY = MILLIS_PER_HOUR * 12; + private static final long MICROS_PER_MILLI = 1_000; + private static final long NANOS_PER_MILLI = 1_000_000; + + private final long months; + private final long days; + private final long millis; + + /** + * Create a TimePeriod from another TemporalAmount, such as a {@link Period} or + * a {@link Duration}. + * + * @param amount a temporal amount + * @return the corresponding TimePeriod + */ + public static TimePeriod from(TemporalAmount amount) { + if (requireNonNull(amount, "amount") instanceof TimePeriod) { + return (TimePeriod) amount; + } + if (amount instanceof ChronoPeriod) { + if (!IsoChronology.INSTANCE.equals(((ChronoPeriod) amount).getChronology())) { + throw new DateTimeException("TimePeriod requires ISO chronology: " + amount); + } + } + long months = 0; + long days = 0; + long millis = 0; + for (TemporalUnit unit : amount.getUnits()) { + if (unit instanceof ChronoUnit) { + long unitAmount = amount.get(unit); + switch ((ChronoUnit) unit) { + case MILLENNIA: + months = unsignedInt(months + unitAmount * MONTHS_PER_MILLENNIUM); + break; + case CENTURIES: + months = unsignedInt(months + unitAmount * MONTHS_PER_CENTURY); + break; + case DECADES: + months = unsignedInt(months + unitAmount * MONTHS_PER_DECADE); + break; + case YEARS: + months = unsignedInt(months + unitAmount * MONTHS_PER_YEAR); + break; + case MONTHS: + months = unsignedInt(months + unitAmount); + break; + case WEEKS: + days = unsignedInt(days + unitAmount * 7); + break; + case DAYS: + days = unsignedInt(days + unitAmount); + break; + case HALF_DAYS: + days = unsignedInt(days + (unitAmount / 2)); // Truncates halves + if (unitAmount % 2 != 0) { + millis = unsignedInt(millis + MILLIS_IN_HALF_DAY); + } + break; + case HOURS: + millis = unsignedInt(millis + unitAmount * MILLIS_PER_HOUR); + break; + case MINUTES: + millis = unsignedInt(millis + unitAmount * MILLIS_PER_MINUTE); + break; + case SECONDS: + millis = unsignedInt(millis + unitAmount * MILLIS_PER_SECOND); + break; + case MILLIS: + millis = unsignedInt(millis + unitAmount); + break; + case MICROS: + if (unitAmount % MICROS_PER_MILLI != 0) { + throw new DateTimeException( + "Cannot add " + unitAmount + " microseconds: not a whole number of milliseconds"); + } + millis = unsignedInt(millis + unitAmount / MICROS_PER_MILLI); + break; + case NANOS: + if (unitAmount % NANOS_PER_MILLI != 0) { + throw new DateTimeException( + "Cannot add " + unitAmount + " nanoseconds: not a whole number of milliseconds"); + } + millis = unsignedInt(millis + unitAmount / NANOS_PER_MILLI); + break; + default: + throw new UnsupportedTemporalTypeException("Unsupported unit: " + unit); + } + } else { + throw new UnsupportedTemporalTypeException("Unsupported unit: " + unit); + } + } + return new TimePeriod(months, days, millis); + } + + /** + * Create a TimePeriod from a number of months, days and milliseconds + * + * @param months a number of months + * @param days a number of days + * @param millis a number of milliseconds + * @return the corresponding TimePeriod + * @throws ArithmeticException if any of the parameters does not fit an unsigned + * long (0..4294967296) + */ + public static TimePeriod of(long months, long days, long millis) { + return new TimePeriod(unsignedInt(months), unsignedInt(days), unsignedInt(millis)); + } + + private static long unsignedInt(long number) { + if (number != (number & MAX_UNSIGNED_INT)) { + throw new ArithmeticException("Overflow/underflow of unsigned int"); + } + return number; + } + + private TimePeriod(long months, long days, long millis) { + this.months = months; + this.days = days; + this.millis = millis; + } + + public Duration toDuration() { + return Duration.from(this); + } + + public Period toPeriod() { + if (isDateBased()) { + // We use unsigned ints, which have double the range of a signed int that + // Period uses. We can split months to years and months to ensure there's no + // overflow. But we cannot split days, as both days and months have varying + // lengths. + int yearsAsInt = (int) (months / MONTHS_PER_YEAR); + int monthsAsInt = (int) (months % MONTHS_PER_YEAR); + int daysAsInt = (int) days; + if (days != daysAsInt) { + throw new DateTimeException("Too many days: a Period can contain at most " + Integer.MAX_VALUE + " days."); + } + return Period.ofYears(yearsAsInt).withMonths(monthsAsInt).withDays(daysAsInt); + } + throw new DateTimeException("Cannot convert this TimePeriod to a Period: is not date based"); + } + + /** + * Determines if the TimePeriod is date based (i.e., if its milliseconds + * component is 0). + * + * @return {@code true} iff the TimePeriod is date based + */ + public boolean isDateBased() { + return millis == 0; + } + + /** + * Determines if the TimePeriod is time based (i.e., if its months and days + * components are 0). + * + * @return {@code true} iff the TimePeriod is time based + */ + public boolean isTimeBased() { + return months == 0 && days == 0; + } + + public long getMonths() { + return months; + } + + public long getDays() { + return days; + } + + public long getMillis() { + return millis; + } + + @Override + public long get(TemporalUnit unit) { + if (unit == MONTHS) { + return months; + } else if (unit == DAYS) { + return days; + } else if (unit == MILLIS) { + return millis; + } else { + throw new UnsupportedTemporalTypeException("Unsupported unit: " + unit); + } + } + + @Override + public List getUnits() { + List units = new ArrayList<>(); + // The zero-checks ensure compatibility with the Java Time classes Period and + // Duration where possible. + if (months != 0) { + units.add(MONTHS); + } + if (days != 0) { + units.add(DAYS); + } + if (millis != 0) { + units.add(MILLIS); + } + return unmodifiableList(units); + } + + @Override + public Temporal addTo(Temporal temporal) { + return addTo(temporal, months, days, millis); + } + + @Override + public Temporal subtractFrom(Temporal temporal) { + return addTo(temporal, -months, -days, -millis); + } + + private Temporal addTo(Temporal temporal, long months, long days, long millis) { + // The zero-checks ensure we can add a TimePeriod to a Temporal even when it + // does not support all fields, as long as the unsupported fields are zero. + if (months != 0) { + temporal = temporal.plus(months, MONTHS); + } + if (days != 0) { + temporal = temporal.plus(days, DAYS); + } + if (millis != 0) { + temporal = temporal.plus(millis, MILLIS); + } + return temporal; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + TimePeriod that = (TimePeriod) o; + return months == that.months && days == that.days && millis == that.millis; + } + + @Override + public int hashCode() { + return Objects.hash(months, days, millis); + } + + @Override + public String toString() { + StringBuilder buffer = new StringBuilder(); + buffer.append("P"); + if (months != 0) { + int years = (int) (months / MONTHS_PER_YEAR); + int monthsLeft = (int) (months % MONTHS_PER_YEAR); + if (years != 0) { + buffer.append(years).append("Y"); + } + if (monthsLeft != 0) { + buffer.append(monthsLeft).append("M"); + } + } + if (days != 0 || (months == 0 && millis == 0)) { + buffer.append(days); + } + if (millis != 0) { + long millisLeft = millis; + int hours = (int) (millisLeft / MILLIS_PER_HOUR); + millisLeft -= MILLIS_PER_HOUR * hours; + int minutes = (int) (millisLeft / MILLIS_PER_MINUTE); + millisLeft -= MILLIS_PER_MINUTE * minutes; + int seconds = (int) (millisLeft / MILLIS_PER_SECOND); + millisLeft -= MILLIS_PER_SECOND * seconds; + if (millisLeft != 0) { + buffer.append(String.format("T%02d:%02d:%02d.%03d", hours, minutes, seconds, millisLeft)); + } else if (seconds != 0) { + buffer.append(String.format("T%02d:%02d:%02d", hours, minutes, seconds)); + } else if (minutes != 0) { + buffer.append(String.format("T%02d:%02d", hours, minutes)); + } else { + buffer.append(String.format("T%02d", hours)); + } + } + return buffer.toString(); + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java index f54b6e2062b..a5c4ece29d7 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java +++ b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java @@ -24,9 +24,8 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; -import org.apache.avro.AvroRuntimeException; +import org.apache.avro.SystemLimitException; import org.apache.avro.io.BinaryData; -import org.slf4j.LoggerFactory; /** * A Utf8 string. Unlike {@link String}, instances are mutable. This is more @@ -34,22 +33,8 @@ * as a single instance may be reused. */ public class Utf8 implements Comparable, CharSequence, Externalizable { - private static final String MAX_LENGTH_PROPERTY = "org.apache.avro.limits.string.maxLength"; - private static final int MAX_LENGTH; - private static final byte[] EMPTY = new byte[0]; - static { - String o = System.getProperty(MAX_LENGTH_PROPERTY); - int i = Integer.MAX_VALUE; - if (o != null) { - try { - i = Integer.parseUnsignedInt(o); - } catch (NumberFormatException nfe) { - LoggerFactory.getLogger(Utf8.class).warn("Could not parse property " + MAX_LENGTH_PROPERTY + ": " + o, nfe); - } - } - MAX_LENGTH = i; - } + private static final byte[] EMPTY = new byte[0]; private byte[] bytes; private int hash; @@ -57,13 +42,14 @@ public class Utf8 implements Comparable, CharSequence, Externalizable { private String string; public Utf8() { - bytes = EMPTY; + this.bytes = EMPTY; + this.hash = 1; } public Utf8(String string) { byte[] bytes = getBytesFor(string); int length = bytes.length; - checkLength(length); + SystemLimitException.checkMaxStringLength(length); this.bytes = bytes; this.length = length; this.string = string; @@ -78,26 +64,26 @@ public Utf8(Utf8 other) { public Utf8(byte[] bytes) { int length = bytes.length; - checkLength(length); + SystemLimitException.checkMaxStringLength(length); this.bytes = bytes; this.length = length; } - /** - * Return UTF-8 encoded bytes. Only valid through {@link #getByteLength()}. - */ - public byte[] getBytes() { - return bytes; + Utf8(String string, int length) { + this(string); + this.length = length; } /** - * Return length in bytes. + * Return UTF-8 encoded bytes. Only valid through {@link #getByteLength()} + * assuming the bytes have been fully copied into the underlying buffer from the + * source. * - * @deprecated call {@link #getByteLength()} instead. + * @see #setByteLength(int) + * @return a reference to the underlying byte array */ - @Deprecated - public int getLength() { - return length; + public byte[] getBytes() { + return bytes; } /** Return length in bytes. */ @@ -106,24 +92,27 @@ public int getByteLength() { } /** - * Set length in bytes. Should called whenever byte content changes, even if the - * length does not change, as this also clears the cached String. + * Set length in bytes. When calling this method, even if the new length is the + * same as the current length, the cached contents of this Utf8 object will be + * wiped out. After calling this method, no assumptions should be made about the + * internal state (e.g., contents, hashcode, equality, etc.) of this Utf8 String + * other than the internal buffer being large enough to accommodate a String of + * the new length. This should be called immediately before reading a String + * from the underlying data source. * - * @deprecated call {@link #setByteLength(int)} instead. - */ - @Deprecated - public Utf8 setLength(int newLength) { - return setByteLength(newLength); - } - - /** - * Set length in bytes. Should called whenever byte content changes, even if the - * length does not change, as this also clears the cached String. + * @param newLength the new length of the underlying buffer + * @return a reference to this object. + * @see org.apache.avro.io.BinaryDecoder#readString(Utf8) */ public Utf8 setByteLength(int newLength) { - checkLength(newLength); + SystemLimitException.checkMaxStringLength(newLength); + + // Note that if the buffer size increases, the internal buffer is zero-ed out. + // If the buffer is large enough, just the length pointer moves and the old + // contents remain. For consistency's sake, we could zero-out the buffer in + // both cases, but would be a perf hit. if (this.bytes.length < newLength) { - this.bytes = Arrays.copyOf(this.bytes, newLength); + this.bytes = new byte[newLength]; } this.length = newLength; this.string = null; @@ -135,7 +124,7 @@ public Utf8 setByteLength(int newLength) { public Utf8 set(String string) { byte[] bytes = getBytesFor(string); int length = bytes.length; - checkLength(length); + SystemLimitException.checkMaxStringLength(length); this.bytes = bytes; this.length = length; this.string = string; @@ -173,6 +162,10 @@ public boolean equals(Object o) { Utf8 that = (Utf8) o; if (!(this.length == that.length)) return false; + // For longer strings, leverage vectorization (JDK 9+) to determine equality + // For shorter strings, the overhead of this method defeats the value + if (this.length > 7) + return Arrays.equals(this.bytes, 0, this.length, that.bytes, 0, that.length); byte[] thatBytes = that.bytes; for (int i = 0; i < this.length; i++) if (bytes[i] != thatBytes[i]) @@ -186,8 +179,15 @@ public int hashCode() { if (h == 0) { byte[] bytes = this.bytes; int length = this.length; - for (int i = 0; i < length; i++) { - h = h * 31 + bytes[i]; + // If the array is filled, use the underlying JDK hash functionality. + // Starting with JDK 21, the underlying implementation is vectorized. + if (length > 7 && bytes.length == length) { + h = Arrays.hashCode(bytes); + } else { + h = 1; + for (int i = 0; i < length; i++) { + h = h * 31 + bytes[i]; + } } this.hash = h; } @@ -215,12 +215,6 @@ public CharSequence subSequence(int start, int end) { return toString().subSequence(start, end); } - private static void checkLength(int length) { - if (length > MAX_LENGTH) { - throw new AvroRuntimeException("String length " + length + " exceeds maximum allowed"); - } - } - /** Gets the UTF-8 bytes for a String */ public static byte[] getBytesFor(String str) { return str.getBytes(StandardCharsets.UTF_8); @@ -237,4 +231,28 @@ public void readExternal(ObjectInput in) throws IOException, ClassNotFoundExcept setByteLength(in.readInt()); in.readFully(bytes); } + + public static int compareSequences(CharSequence cs1, CharSequence cs2) { + if (cs1 == cs2) { + return 0; + } + + if (cs1 == null || cs2 == null) { + return cs1 == null ? 1 : -1; + } + + if (cs1.getClass() == cs2.getClass() && cs1 instanceof Comparable) { + return ((Comparable) cs1).compareTo(cs2); + } + + for (int i = 0, len = Math.min(cs1.length(), cs2.length()); i < len; i++) { + char a = cs1.charAt(i); + char b = cs2.charAt(i); + if (a != b) { + return a - b; + } + } + + return cs1.length() - cs2.length(); + } } diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/UtfTextUtils.java b/lang/java/avro/src/main/java/org/apache/avro/util/UtfTextUtils.java new file mode 100644 index 00000000000..967a48bf364 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/util/UtfTextUtils.java @@ -0,0 +1,247 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.util; + +import java.io.BufferedInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; + +/** + * Text utilities especially suited for UTF encoded bytes. + * + *

    + * When the character set is unknown, methods in this class assume UTF encoded + * text and try to detect the UTF variant (8/16/32 bits, big/little endian), + * using the BOM (if present) or an educated guess assuming the first character + * is in the range U+0000-U+00FF. This heuristic works for all latin text based + * formats, which includes Avro IDL, JSON, XML, etc. If the heuristic fails, + * UTF-8 is assumed. + *

    + * + * @see XML specification, + * appendix F: Autodetection of Character Encodings (Non-Normative) + */ +public class UtfTextUtils { + private static final int TRANSFER_BUFFER_SIZE = 4096; + /** + * JVM standard character set (but that doesn't have a constant in + * {@link StandardCharsets}) for UTF-32. + */ + private static final Charset UTF_32 = Charset.forName("UTF-32"); + /** + * JVM standard character set (but that doesn't have a constant in + * {@link StandardCharsets}) for UTF-32BE. + */ + private static final Charset UTF_32BE = Charset.forName("UTF-32BE"); + /** + * JVM standard character set (but that doesn't have a constant in + * {@link StandardCharsets}) for UTF-32LE. + */ + private static final Charset UTF_32LE = Charset.forName("UTF-32LE"); + + public static String asString(byte[] bytes, Charset charset) { + if (charset == null) { + charset = detectUtfCharset(bytes); + } + return skipBOM(new String(bytes, charset)); + } + + /** + * Reads the specified input stream as text. If {@code charset} is {@code null}, + * the method will assume UTF encoded text and attempt to detect the appropriate + * charset. + * + * @param input the input to read + * @param charset the character set of the input, if known + * @return all bytes, read into a string + * @throws IOException when reading the input fails for some reason + */ + public static String readAllBytes(InputStream input, Charset charset) throws IOException { + if (charset == null) { + input = ensureMarkSupport(input); + input.mark(4); + byte[] buffer = new byte[4]; + int bytesRead = fillBuffer(input, buffer); + input.reset(); + + charset = detectUtfCharset0(buffer, bytesRead); + + if (charset == null) { + throw new IOException("Unsupported UCS-4 variant (neither UTF-32BE nor UTF32-LE)"); + } + } + Reader reader = new InputStreamReader(input, charset); + return readAllChars(reader); + } + + private static InputStream ensureMarkSupport(InputStream input) { + if (input.markSupported()) { + return input; + } else { + return new BufferedInputStream(input); + } + } + + private static int fillBuffer(InputStream in, byte[] buf) throws IOException { + int remaining = buf.length; + int offset = 0; + while (remaining > 0) { + int bytesRead = in.read(buf, offset, remaining); + // As remaining > 0, bytesRead is either -1 or positive + if (bytesRead == -1) { + break; + } + offset += bytesRead; + remaining -= bytesRead; + } + return offset; + } + + public static String readAllChars(Reader input) throws IOException { + StringBuilder buffer = new StringBuilder(); + char[] charBuffer = new char[TRANSFER_BUFFER_SIZE]; + int charsRead; + while ((charsRead = input.read(charBuffer, 0, TRANSFER_BUFFER_SIZE)) >= 0) { + buffer.append(charBuffer, 0, charsRead); + } + return skipBOM(buffer); + } + + private static String skipBOM(CharSequence buffer) { + if (buffer.charAt(0) == '\uFEFF') { + return buffer.subSequence(1, buffer.length()).toString(); + } + return buffer.toString(); + } + + /** + * Assuming UTF encoded bytes, detect the UTF variant (8/16/32 bits, big/little + * endian). + * + *

    + * To ensure the most accurate detection, the algorithm requires at least 4 + * bytes. One should only provide less than 4 bytes of data if that is all there + * is. + *

    + * + *

    + * Detection is certain when a byte order mark (BOM) is used. Otherwise a + * heuristic is used, which works when the first character is from the first 256 + * characters from the BMP (U+0000-U+00FF). This works for all latin-based + * textual formats, like Avro IDL, JSON, YAML, XML, etc. + *

    + * + * @param firstFewBytes the first few bytes of the text to detect the character + * set of + * @return the character set to use + */ + public static Charset detectUtfCharset(byte[] firstFewBytes) { + Charset detectedCharset = detectUtfCharset0(firstFewBytes, firstFewBytes.length); + if (detectedCharset == null) { + throw new IllegalArgumentException("Unsupported UCS-4 variant (neither UTF-32BE nor UTF32-LE)"); + } + return detectedCharset; + } + + private static Charset detectUtfCharset0(byte[] firstFewBytes, int numBytes) { + // spotless:off + /* + * Lookup table, adapted from https://www.w3.org/TR/xml/#sec-guessing + * It omits non-UTF encodings (the 2nd and 3rd rows from the end). + * Note that the order (with respect to UTF-32 & UTF-16) is important! + * + * (the non-zero bytes encode the byte order mark, BOM) + * + * Match the 'magic bytes' in order, and take the first match: + * 00 00 FE FF -> UTF-32 (be) + * FF FE 00 00 -> UTF-32 (le) + * 00 00 FF FE -> unsupported UCS-4 (byte order 2143) + * FE FF 00 00 -> unsupported UCS-4 (byte order 3412) + * FE FF __ __ -> UTF-16 (be) + * FF FE __ __ -> UTF-16 (le) + * EF BB BF __ -> UTF-8 + * 00 00 00 __ -> UTF-32BE + * __ 00 00 00 -> UTF-32LE + * 00 00 __ 00 -> unsupported UCS-4 (byte order 2143) + * 00 __ 00 00 -> unsupported UCS-4 (byte order 3412) + * 00 __ __ __ -> UTF-16BE + * __ 00 __ __ -> UTF-16LE + * __ __ __ __ -> UTF-8 (fallback) + */ + // spotless:on + int quad = quad(firstFewBytes, numBytes); + int word = quad >>> 16; + if (numBytes > 3 && (quad == 0x0000FEFF || quad == 0xFFFE0000)) { + // With BOM: UTF-32 (Charset handles BOM & endianness) + return UTF_32; + } else if (numBytes > 3 && (quad == 0x0000FFFE || quad == 0xFEFF0000)) { + // With BOM: unsupported UCS-4 encoding (byte order 2143 resp. 3412) + return null; + } else if (numBytes > 1 && (word == 0xFEFF || word == 0xFFFE)) { + // With BOM: UTF-16 (Charset handles BOM & endianness) + return StandardCharsets.UTF_16; + } else if (numBytes > 2 && quad >>> 8 == 0xEFBBBF) { + // With BOM: UTF-8 (Charset does not handle a BOM, so our caller must skip it) + return StandardCharsets.UTF_8; + } else if (numBytes > 3 && (quad & 0xFFFFFF00) == 0) { + // Without BOM (i.e., a guess) + return UTF_32BE; + } else if (numBytes > 3 && (quad & 0x00FFFFFF) == 0) { + // Without BOM (i.e., a guess) + return UTF_32LE; + } else if (numBytes > 3 && (quad & 0xFFFF00FF) == 0 || (quad & 0xFF00FFFF) == 0) { + // Without BOM (i.e., a guess): unsupported UCS-4 encoding (byte order 2143 + // resp. 3412) + return null; + } else if (numBytes > 1 && (word & 0xFF00) == 0) { + // Without BOM (i.e., a guess) + return StandardCharsets.UTF_16BE; + } else if (numBytes > 1 && (word & 0x00FF) == 0) { + // Without BOM (i.e., a guess) + return StandardCharsets.UTF_16LE; + } else { + // Fallback + return StandardCharsets.UTF_8; + } + } + + private static int quad(byte[] bytes, int length) { + int quad = 0xFFFFFFFF; + switch (length) { + default: + quad = (quad & 0xFFFFFF00) | (bytes[3] & 0xFF); + // Fallthrough + case 3: + quad = (quad & 0xFFFF00FF) | (bytes[2] & 0xFF) << 8; + // Fallthrough + case 2: + quad = (quad & 0xFF00FFFF) | (bytes[1] & 0xFF) << 16; + // Fallthrough + case 1: + quad = (quad & 0x00FFFFFF) | (bytes[0] & 0xFF) << 24; + // Fallthrough + case 0: + break; + } + return quad; + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/WeakIdentityHashMap.java b/lang/java/avro/src/main/java/org/apache/avro/util/WeakIdentityHashMap.java index a57cb49ac13..565d8e7ed36 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/util/WeakIdentityHashMap.java +++ b/lang/java/avro/src/main/java/org/apache/avro/util/WeakIdentityHashMap.java @@ -22,10 +22,10 @@ import java.lang.ref.WeakReference; import java.util.Collection; import java.util.Collections; -import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; /** * Implements a combination of WeakHashMap and IdentityHashMap. Useful for @@ -41,7 +41,7 @@ */ public class WeakIdentityHashMap implements Map { private final ReferenceQueue queue = new ReferenceQueue<>(); - private Map backingStore = new HashMap<>(); + private Map backingStore = new ConcurrentHashMap<>(); public WeakIdentityHashMap() { } diff --git a/lang/java/ipc-jetty/src/main/java/org/apache/avro/ipc/jetty/StaticServlet.java b/lang/java/avro/src/main/java/org/apache/avro/util/internal/ClassValueCache.java similarity index 51% rename from lang/java/ipc-jetty/src/main/java/org/apache/avro/ipc/jetty/StaticServlet.java rename to lang/java/avro/src/main/java/org/apache/avro/util/internal/ClassValueCache.java index 2e28ba14eba..25cc3fb1271 100644 --- a/lang/java/ipc-jetty/src/main/java/org/apache/avro/ipc/jetty/StaticServlet.java +++ b/lang/java/avro/src/main/java/org/apache/avro/util/internal/ClassValueCache.java @@ -15,31 +15,33 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +package org.apache.avro.util.internal; -package org.apache.avro.ipc.jetty; - -import java.net.URL; - -import org.eclipse.jetty.servlet.DefaultServlet; -import org.eclipse.jetty.util.resource.Resource; +import java.util.function.Function; /** - * Very simple servlet class capable of serving static files. + * Wraps a {@link ClassValue} cache so it can be overridden in an android + * environment, where it isn't available. + * + * @param Return type of the ClassValue */ -public class StaticServlet extends DefaultServlet { - private static final long serialVersionUID = 1L; +public class ClassValueCache implements Function, R> { - @Override - public Resource getResource(String pathInContext) { - // Take only last slice of the URL as a filename, so we can adjust path. - // This also prevents mischief like '../../foo.css' - String[] parts = pathInContext.split("/"); - String filename = parts[parts.length - 1]; + private final Function, R> ifAbsent; - URL resource = getClass().getClassLoader().getResource("org/apache/avro/ipc/stats/static/" + filename); - if (resource == null) { - return null; + private final ClassValue cache = new ClassValue() { + @Override + protected R computeValue(Class c) { + return ifAbsent.apply(c); } - return Resource.newResource(resource); + }; + + public ClassValueCache(Function, R> ifAbsent) { + this.ifAbsent = ifAbsent; + } + + @Override + public R apply(Class c) { + return cache.get(c); } } diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/internal/JacksonUtils.java b/lang/java/avro/src/main/java/org/apache/avro/util/internal/JacksonUtils.java index 1a822899f97..c085997aab8 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/util/internal/JacksonUtils.java +++ b/lang/java/avro/src/main/java/org/apache/avro/util/internal/JacksonUtils.java @@ -40,6 +40,18 @@ import com.fasterxml.jackson.databind.util.TokenBuffer; public class JacksonUtils { + /** + * Object Mapper used for toJsonNode method. + */ + private static final ObjectMapper MAPPER = new ObjectMapper(); + + /** + * This object mapper uses a special variant that has different visibility + * rules, used in objectToMap method. + */ + private static final ObjectMapper OBJECT_TO_MAP_MAPPER = MAPPER.copy() + .setVisibility(PropertyAccessor.ALL, JsonAutoDetect.Visibility.NONE) + .setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY); private JacksonUtils() { } @@ -49,9 +61,9 @@ public static JsonNode toJsonNode(Object datum) { return null; } try { - TokenBuffer generator = new TokenBuffer(new ObjectMapper(), false); + TokenBuffer generator = new TokenBuffer(MAPPER, false); toJson(datum, generator); - return new ObjectMapper().readTree(generator.asParser()); + return MAPPER.readTree(generator.asParser()); } catch (IOException e) { throw new AvroRuntimeException(e); } @@ -102,14 +114,26 @@ public static Object toObject(JsonNode jsonNode) { } public static Object toObject(JsonNode jsonNode, Schema schema) { - if (schema != null && schema.getType().equals(Schema.Type.UNION)) { - return toObject(jsonNode, schema.getTypes().get(0)); - } if (jsonNode == null) { return null; } else if (jsonNode.isNull()) { return JsonProperties.NULL_VALUE; - } else if (jsonNode.isBoolean()) { + } + + if (schema != null && schema.getType().equals(Schema.Type.UNION)) { + for (Schema unionType : schema.getTypes()) { + if (unionType.getType().equals(Schema.Type.NULL)) { + continue; + } + Object unionObject = toObject(jsonNode, unionType); + if (unionObject != null) { + return unionObject; + } + } + return null; + } + + if (jsonNode.isBoolean()) { return jsonNode.asBoolean(); } else if (jsonNode.isInt()) { if (schema == null || schema.getType().equals(Schema.Type.INT)) { @@ -175,15 +199,11 @@ public static Object toObject(JsonNode jsonNode, Schema schema) { /** * Convert an object into a map - * + * * @param datum The object * @return Its Map representation */ public static Map objectToMap(Object datum) { - ObjectMapper mapper = new ObjectMapper(); - // we only care about fields - mapper.setVisibility(PropertyAccessor.ALL, JsonAutoDetect.Visibility.NONE); - mapper.setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY); - return mapper.convertValue(datum, Map.class); + return OBJECT_TO_MAP_MAPPER.convertValue(datum, Map.class); } } diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/internal/ThreadLocalWithInitial.java b/lang/java/avro/src/main/java/org/apache/avro/util/internal/ThreadLocalWithInitial.java new file mode 100644 index 00000000000..a49267b0c97 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/util/internal/ThreadLocalWithInitial.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.util.internal; + +import java.util.function.Supplier; + +/** + * Wraps a {@link ThreadLocal#withInitial(Supplier)} so it can be overridden in + * an android environment, where this method is not available until API 26. + */ +public class ThreadLocalWithInitial { + + /** Delegate a ThreadLocal instance with the supplier. */ + public static ThreadLocal of(Supplier supplier) { + return ThreadLocal.withInitial(supplier); + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/springframework/Assert.java b/lang/java/avro/src/main/java/org/apache/avro/util/springframework/Assert.java new file mode 100644 index 00000000000..70e2e9f3b30 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/util/springframework/Assert.java @@ -0,0 +1,121 @@ +/* + * Copyright 2002-2020 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.util.springframework; + +import org.apache.avro.reflect.Nullable; + +/** + * Assertion utility class that assists in validating arguments. + * + *

    + * Useful for identifying programmer errors early and clearly at runtime. + * + *

    + * For example, if the contract of a public method states it does not allow + * {@code null} arguments, {@code Assert} can be used to validate that contract. + * Doing this clearly indicates a contract violation when it occurs and protects + * the class's invariants. + * + *

    + * Typically used to validate method arguments rather than configuration + * properties, to check for cases that are usually programmer errors rather than + * configuration errors. In contrast to configuration initialization code, there + * is usually no point in falling back to defaults in such methods. + * + *

    + * This class is similar to JUnit's assertion library. If an argument value is + * deemed invalid, an {@link IllegalArgumentException} is thrown (typically). + * For example: + * + *

    + * Assert.notNull(clazz, "The class must not be null");
    + * Assert.isTrue(i > 0, "The value must be greater than zero");
    + * 
    + * + *

    + * Mainly for internal use within the framework; for a more comprehensive suite + * of assertion utilities consider {@code org.apache.commons.lang3.Validate} + * from Apache Commons + * Lang, Google Guava's Preconditions, + * or similar third-party libraries. + * + * @author Keith Donald + * @author Juergen Hoeller + * @author Sam Brannen + * @author Colin Sampaleanu + * @author Rob Harrop + * @since 1.1.2 + */ +class Assert { + private Assert() { + } + + /** + * Assert a boolean expression, throwing an {@code IllegalStateException} if the + * expression evaluates to {@code false}. + * + *

    +   * Assert.state(id == null, "The id property must not already be initialized");
    +   * 
    + * + * @param expression a boolean expression + * @param message the exception message to use if the assertion fails + * @throws IllegalStateException if {@code expression} is {@code false} + */ + public static void state(boolean expression, String message) { + if (!expression) { + throw new IllegalStateException(message); + } + } + + /** + * Assert a boolean expression, throwing an {@code IllegalArgumentException} if + * the expression evaluates to {@code false}. + * + *
    +   * Assert.isTrue(i > 0, "The value must be greater than zero");
    +   * 
    + * + * @param expression a boolean expression + * @param message the exception message to use if the assertion fails + * @throws IllegalArgumentException if {@code expression} is {@code false} + */ + public static void isTrue(boolean expression, String message) { + if (!expression) { + throw new IllegalArgumentException(message); + } + } + + /** + * Assert that an object is not {@code null}. + * + *
    +   * Assert.notNull(clazz, "The class must not be null");
    +   * 
    + * + * @param object the object to check + * @param message the exception message to use if the assertion fails + * @throws IllegalArgumentException if the object is {@code null} + */ + public static void notNull(@Nullable Object object, String message) { + if (object == null) { + throw new IllegalArgumentException(message); + } + } + +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/springframework/ConcurrentReferenceHashMap.java b/lang/java/avro/src/main/java/org/apache/avro/util/springframework/ConcurrentReferenceHashMap.java new file mode 100644 index 00000000000..1a137cf2101 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/util/springframework/ConcurrentReferenceHashMap.java @@ -0,0 +1,1111 @@ +/* + * Copyright 2002-2021 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.util.springframework; + +import org.apache.avro.reflect.Nullable; + +import java.lang.ref.ReferenceQueue; +import java.lang.ref.SoftReference; +import java.lang.ref.WeakReference; +import java.lang.reflect.Array; +import java.util.AbstractMap; +import java.util.AbstractSet; +import java.util.Collections; +import java.util.EnumSet; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.locks.ReentrantLock; + +/** + * A {@link ConcurrentHashMap} that uses {@link ReferenceType#SOFT soft} or + * {@linkplain ReferenceType#WEAK weak} references for both {@code keys} and + * {@code values}. + * + *

    + * This class can be used as an alternative to + * {@code Collections.synchronizedMap(new WeakHashMap>())} in + * order to support better performance when accessed concurrently. This + * implementation follows the same design constraints as + * {@link ConcurrentHashMap} with the exception that {@code null} values and + * {@code null} keys are supported. + * + *

    + * NOTE: The use of references means that there is no guarantee that + * items placed into the map will be subsequently available. The garbage + * collector may discard references at any time, so it may appear that an + * unknown thread is silently removing entries. + * + *

    + * If not explicitly specified, this implementation will use + * {@linkplain SoftReference soft entry references}. + * + * @param the key type + * @param the value type + * @author Phillip Webb + * @author Juergen Hoeller + * @since 3.2 + */ +public class ConcurrentReferenceHashMap extends AbstractMap implements ConcurrentMap { + + private static final int DEFAULT_INITIAL_CAPACITY = 16; + + private static final float DEFAULT_LOAD_FACTOR = 0.75f; + + private static final int DEFAULT_CONCURRENCY_LEVEL = 16; + + private static final ReferenceType DEFAULT_REFERENCE_TYPE = ReferenceType.SOFT; + + private static final int MAXIMUM_CONCURRENCY_LEVEL = 1 << 16; + + private static final int MAXIMUM_SEGMENT_SIZE = 1 << 30; + + /** + * Array of segments indexed using the high order bits from the hash. + */ + private final Segment[] segments; + + /** + * When the average number of references per table exceeds this value resize + * will be attempted. + */ + private final float loadFactor; + + /** + * The reference type: SOFT or WEAK. + */ + private final ReferenceType referenceType; + + /** + * The shift value used to calculate the size of the segments array and an index + * from the hash. + */ + private final int shift; + + /** + * Late binding entry set. + */ + @Nullable + private volatile Set> entrySet; + + /** + * Create a new {@code ConcurrentReferenceHashMap} instance. + */ + public ConcurrentReferenceHashMap() { + this(DEFAULT_INITIAL_CAPACITY, DEFAULT_LOAD_FACTOR, DEFAULT_CONCURRENCY_LEVEL, DEFAULT_REFERENCE_TYPE); + } + + /** + * Create a new {@code ConcurrentReferenceHashMap} instance. + * + * @param initialCapacity the initial capacity of the map + */ + public ConcurrentReferenceHashMap(int initialCapacity) { + this(initialCapacity, DEFAULT_LOAD_FACTOR, DEFAULT_CONCURRENCY_LEVEL, DEFAULT_REFERENCE_TYPE); + } + + /** + * Create a new {@code ConcurrentReferenceHashMap} instance. + * + * @param initialCapacity the initial capacity of the map + * @param loadFactor the load factor. When the average number of references + * per table exceeds this value resize will be attempted + */ + public ConcurrentReferenceHashMap(int initialCapacity, float loadFactor) { + this(initialCapacity, loadFactor, DEFAULT_CONCURRENCY_LEVEL, DEFAULT_REFERENCE_TYPE); + } + + /** + * Create a new {@code ConcurrentReferenceHashMap} instance. + * + * @param initialCapacity the initial capacity of the map + * @param concurrencyLevel the expected number of threads that will concurrently + * write to the map + */ + public ConcurrentReferenceHashMap(int initialCapacity, int concurrencyLevel) { + this(initialCapacity, DEFAULT_LOAD_FACTOR, concurrencyLevel, DEFAULT_REFERENCE_TYPE); + } + + /** + * Create a new {@code ConcurrentReferenceHashMap} instance. + * + * @param initialCapacity the initial capacity of the map + * @param referenceType the reference type used for entries (soft or weak) + */ + public ConcurrentReferenceHashMap(int initialCapacity, ReferenceType referenceType) { + this(initialCapacity, DEFAULT_LOAD_FACTOR, DEFAULT_CONCURRENCY_LEVEL, referenceType); + } + + /** + * Create a new {@code ConcurrentReferenceHashMap} instance. + * + * @param initialCapacity the initial capacity of the map + * @param loadFactor the load factor. When the average number of + * references per table exceeds this value, resize will + * be attempted. + * @param concurrencyLevel the expected number of threads that will concurrently + * write to the map + */ + public ConcurrentReferenceHashMap(int initialCapacity, float loadFactor, int concurrencyLevel) { + this(initialCapacity, loadFactor, concurrencyLevel, DEFAULT_REFERENCE_TYPE); + } + + /** + * Create a new {@code ConcurrentReferenceHashMap} instance. + * + * @param initialCapacity the initial capacity of the map + * @param loadFactor the load factor. When the average number of + * references per table exceeds this value, resize will + * be attempted. + * @param concurrencyLevel the expected number of threads that will concurrently + * write to the map + * @param referenceType the reference type used for entries (soft or weak) + */ + @SuppressWarnings("unchecked") + public ConcurrentReferenceHashMap(int initialCapacity, float loadFactor, int concurrencyLevel, + ReferenceType referenceType) { + + Assert.isTrue(initialCapacity >= 0, "Initial capacity must not be negative"); + Assert.isTrue(loadFactor > 0f, "Load factor must be positive"); + Assert.isTrue(concurrencyLevel > 0, "Concurrency level must be positive"); + Assert.notNull(referenceType, "Reference type must not be null"); + this.loadFactor = loadFactor; + this.shift = calculateShift(concurrencyLevel, MAXIMUM_CONCURRENCY_LEVEL); + int size = 1 << this.shift; + this.referenceType = referenceType; + int roundedUpSegmentCapacity = (int) ((initialCapacity + size - 1L) / size); + int initialSize = 1 << calculateShift(roundedUpSegmentCapacity, MAXIMUM_SEGMENT_SIZE); + Segment[] segments = (Segment[]) Array.newInstance(Segment.class, size); + int resizeThreshold = (int) (initialSize * getLoadFactor()); + for (int i = 0; i < segments.length; i++) { + segments[i] = new Segment(initialSize, resizeThreshold); + } + this.segments = segments; + } + + protected final float getLoadFactor() { + return this.loadFactor; + } + + protected final int getSegmentsSize() { + return this.segments.length; + } + + protected final Segment getSegment(int index) { + return this.segments[index]; + } + + /** + * Factory method that returns the {@link ReferenceManager}. This method will be + * called once for each {@link Segment}. + * + * @return a new reference manager + */ + protected ReferenceManager createReferenceManager() { + return new ReferenceManager(); + } + + /** + * Get the hash for a given object, apply an additional hash function to reduce + * collisions. This implementation uses the same Wang/Jenkins algorithm as + * {@link ConcurrentHashMap}. Subclasses can override to provide alternative + * hashing. + * + * @param o the object to hash (may be null) + * @return the resulting hash code + */ + protected int getHash(@Nullable Object o) { + int hash = (o != null ? o.hashCode() : 0); + hash += (hash << 15) ^ 0xffffcd7d; + hash ^= (hash >>> 10); + hash += (hash << 3); + hash ^= (hash >>> 6); + hash += (hash << 2) + (hash << 14); + hash ^= (hash >>> 16); + return hash; + } + + @Override + @Nullable + public V get(@Nullable Object key) { + Reference ref = getReference(key, Restructure.WHEN_NECESSARY); + Entry entry = (ref != null ? ref.get() : null); + return (entry != null ? entry.getValue() : null); + } + + @Override + @Nullable + public V getOrDefault(@Nullable Object key, @Nullable V defaultValue) { + Reference ref = getReference(key, Restructure.WHEN_NECESSARY); + Entry entry = (ref != null ? ref.get() : null); + return (entry != null ? entry.getValue() : defaultValue); + } + + @Override + public boolean containsKey(@Nullable Object key) { + Reference ref = getReference(key, Restructure.WHEN_NECESSARY); + Entry entry = (ref != null ? ref.get() : null); + return (entry != null && ObjectUtils.nullSafeEquals(entry.getKey(), key)); + } + + /** + * Return a {@link Reference} to the {@link Entry} for the specified + * {@code key}, or {@code null} if not found. + * + * @param key the key (can be {@code null}) + * @param restructure types of restructure allowed during this call + * @return the reference, or {@code null} if not found + */ + @Nullable + protected final Reference getReference(@Nullable Object key, Restructure restructure) { + int hash = getHash(key); + return getSegmentForHash(hash).getReference(key, hash, restructure); + } + + @Override + @Nullable + public V put(@Nullable K key, @Nullable V value) { + return put(key, value, true); + } + + @Override + @Nullable + public V putIfAbsent(@Nullable K key, @Nullable V value) { + return put(key, value, false); + } + + @Nullable + private V put(@Nullable final K key, @Nullable final V value, final boolean overwriteExisting) { + return doTask(key, new Task(TaskOption.RESTRUCTURE_BEFORE, TaskOption.RESIZE) { + @Override + @Nullable + protected V execute(@Nullable Reference ref, @Nullable Entry entry, @Nullable Entries entries) { + if (entry != null) { + V oldValue = entry.getValue(); + if (overwriteExisting) { + entry.setValue(value); + } + return oldValue; + } + Assert.state(entries != null, "No entries segment"); + entries.add(value); + return null; + } + }); + } + + @Override + @Nullable + public V remove(@Nullable Object key) { + return doTask(key, new Task(TaskOption.RESTRUCTURE_AFTER, TaskOption.SKIP_IF_EMPTY) { + @Override + @Nullable + protected V execute(@Nullable Reference ref, @Nullable Entry entry) { + if (entry != null) { + if (ref != null) { + ref.release(); + } + return entry.value; + } + return null; + } + }); + } + + @Override + public boolean remove(@Nullable Object key, final @Nullable Object value) { + Boolean result = doTask(key, new Task(TaskOption.RESTRUCTURE_AFTER, TaskOption.SKIP_IF_EMPTY) { + @Override + protected Boolean execute(@Nullable Reference ref, @Nullable Entry entry) { + if (entry != null && ObjectUtils.nullSafeEquals(entry.getValue(), value)) { + if (ref != null) { + ref.release(); + } + return true; + } + return false; + } + }); + return (Boolean.TRUE.equals(result)); + } + + @Override + public boolean replace(@Nullable K key, final @Nullable V oldValue, final @Nullable V newValue) { + Boolean result = doTask(key, new Task(TaskOption.RESTRUCTURE_BEFORE, TaskOption.SKIP_IF_EMPTY) { + @Override + protected Boolean execute(@Nullable Reference ref, @Nullable Entry entry) { + if (entry != null && ObjectUtils.nullSafeEquals(entry.getValue(), oldValue)) { + entry.setValue(newValue); + return true; + } + return false; + } + }); + return (Boolean.TRUE.equals(result)); + } + + @Override + @Nullable + public V replace(@Nullable K key, final @Nullable V value) { + return doTask(key, new Task(TaskOption.RESTRUCTURE_BEFORE, TaskOption.SKIP_IF_EMPTY) { + @Override + @Nullable + protected V execute(@Nullable Reference ref, @Nullable Entry entry) { + if (entry != null) { + V oldValue = entry.getValue(); + entry.setValue(value); + return oldValue; + } + return null; + } + }); + } + + @Override + public void clear() { + for (Segment segment : this.segments) { + segment.clear(); + } + } + + /** + * Remove any entries that have been garbage collected and are no longer + * referenced. Under normal circumstances garbage collected entries are + * automatically purged as items are added or removed from the Map. This method + * can be used to force a purge, and is useful when the Map is read frequently + * but updated less often. + */ + public void purgeUnreferencedEntries() { + for (Segment segment : this.segments) { + segment.restructureIfNecessary(false); + } + } + + @Override + public int size() { + int size = 0; + for (Segment segment : this.segments) { + size += segment.getCount(); + } + return size; + } + + @Override + public boolean isEmpty() { + for (Segment segment : this.segments) { + if (segment.getCount() > 0) { + return false; + } + } + return true; + } + + @Override + public Set> entrySet() { + Set> entrySet = this.entrySet; + if (entrySet == null) { + entrySet = new EntrySet(); + this.entrySet = entrySet; + } + return entrySet; + } + + @Nullable + private T doTask(@Nullable Object key, Task task) { + int hash = getHash(key); + return getSegmentForHash(hash).doTask(hash, key, task); + } + + private Segment getSegmentForHash(int hash) { + return this.segments[(hash >>> (32 - this.shift)) & (this.segments.length - 1)]; + } + + /** + * Calculate a shift value that can be used to create a power-of-two value + * between the specified maximum and minimum values. + * + * @param minimumValue the minimum value + * @param maximumValue the maximum value + * @return the calculated shift (use {@code 1 << shift} to obtain a value) + */ + protected static int calculateShift(int minimumValue, int maximumValue) { + int shift = 0; + int value = 1; + while (value < minimumValue && value < maximumValue) { + value <<= 1; + shift++; + } + return shift; + } + + /** + * Various reference types supported by this map. + */ + public enum ReferenceType { + + /** + * Use {@link SoftReference SoftReferences}. + */ + SOFT, + + /** + * Use {@link WeakReference WeakReferences}. + */ + WEAK + } + + /** + * A single segment used to divide the map to allow better concurrent + * performance. + */ + @SuppressWarnings("serial") + protected final class Segment extends ReentrantLock { + + private final ReferenceManager referenceManager; + + private final int initialSize; + + /** + * Array of references indexed using the low order bits from the hash. This + * property should only be set along with {@code resizeThreshold}. + */ + private volatile Reference[] references; + + /** + * The total number of references contained in this segment. This includes + * chained references and references that have been garbage collected but not + * purged. + */ + private final AtomicInteger count = new AtomicInteger(); + + /** + * The threshold when resizing of the references should occur. When + * {@code count} exceeds this value references will be resized. + */ + private int resizeThreshold; + + public Segment(int initialSize, int resizeThreshold) { + this.referenceManager = createReferenceManager(); + this.initialSize = initialSize; + this.references = createReferenceArray(initialSize); + this.resizeThreshold = resizeThreshold; + } + + @Nullable + public Reference getReference(@Nullable Object key, int hash, Restructure restructure) { + if (restructure == Restructure.WHEN_NECESSARY) { + restructureIfNecessary(false); + } + if (this.count.get() == 0) { + return null; + } + // Use a local copy to protect against other threads writing + Reference[] references = this.references; + int index = getIndex(hash, references); + Reference head = references[index]; + return findInChain(head, key, hash); + } + + /** + * Apply an update operation to this segment. The segment will be locked during + * the update. + * + * @param hash the hash of the key + * @param key the key + * @param task the update operation + * @return the result of the operation + */ + @Nullable + public T doTask(final int hash, @Nullable final Object key, final Task task) { + boolean resize = task.hasOption(TaskOption.RESIZE); + if (task.hasOption(TaskOption.RESTRUCTURE_BEFORE)) { + restructureIfNecessary(resize); + } + if (task.hasOption(TaskOption.SKIP_IF_EMPTY) && this.count.get() == 0) { + return task.execute(null, null, null); + } + lock(); + try { + final int index = getIndex(hash, this.references); + final Reference head = this.references[index]; + Reference ref = findInChain(head, key, hash); + Entry entry = (ref != null ? ref.get() : null); + Entries entries = value -> { + @SuppressWarnings("unchecked") + Entry newEntry = new Entry<>((K) key, value); + Reference newReference = Segment.this.referenceManager.createReference(newEntry, hash, head); + Segment.this.references[index] = newReference; + Segment.this.count.incrementAndGet(); + }; + return task.execute(ref, entry, entries); + } finally { + unlock(); + if (task.hasOption(TaskOption.RESTRUCTURE_AFTER)) { + restructureIfNecessary(resize); + } + } + } + + /** + * Clear all items from this segment. + */ + public void clear() { + if (this.count.get() == 0) { + return; + } + lock(); + try { + this.references = createReferenceArray(this.initialSize); + this.resizeThreshold = (int) (this.references.length * getLoadFactor()); + this.count.set(0); + } finally { + unlock(); + } + } + + /** + * Restructure the underlying data structure when it becomes necessary. This + * method can increase the size of the references table as well as purge any + * references that have been garbage collected. + * + * @param allowResize if resizing is permitted + */ + private void restructureIfNecessary(boolean allowResize) { + int currCount = this.count.get(); + boolean needsResize = allowResize && (currCount > 0 && currCount >= this.resizeThreshold); + Reference ref = this.referenceManager.pollForPurge(); + if (ref != null || (needsResize)) { + restructure(allowResize, ref); + } + } + + private void restructure(boolean allowResize, @Nullable Reference ref) { + boolean needsResize; + lock(); + try { + int countAfterRestructure = this.count.get(); + Set> toPurge = Collections.emptySet(); + if (ref != null) { + toPurge = new HashSet<>(); + while (ref != null) { + toPurge.add(ref); + ref = this.referenceManager.pollForPurge(); + } + } + countAfterRestructure -= toPurge.size(); + + // Recalculate taking into account count inside lock and items that + // will be purged + needsResize = (countAfterRestructure > 0 && countAfterRestructure >= this.resizeThreshold); + boolean resizing = false; + int restructureSize = this.references.length; + if (allowResize && needsResize && restructureSize < MAXIMUM_SEGMENT_SIZE) { + restructureSize <<= 1; + resizing = true; + } + + // Either create a new table or reuse the existing one + Reference[] restructured = (resizing ? createReferenceArray(restructureSize) : this.references); + + // Restructure + for (int i = 0; i < this.references.length; i++) { + ref = this.references[i]; + if (!resizing) { + restructured[i] = null; + } + while (ref != null) { + if (!toPurge.contains(ref)) { + Entry entry = ref.get(); + if (entry != null) { + int index = getIndex(ref.getHash(), restructured); + restructured[index] = this.referenceManager.createReference(entry, ref.getHash(), restructured[index]); + } + } + ref = ref.getNext(); + } + } + + // Replace volatile members + if (resizing) { + this.references = restructured; + this.resizeThreshold = (int) (this.references.length * getLoadFactor()); + } + this.count.set(Math.max(countAfterRestructure, 0)); + } finally { + unlock(); + } + } + + @Nullable + private Reference findInChain(Reference ref, @Nullable Object key, int hash) { + Reference currRef = ref; + while (currRef != null) { + if (currRef.getHash() == hash) { + Entry entry = currRef.get(); + if (entry != null) { + K entryKey = entry.getKey(); + if (ObjectUtils.nullSafeEquals(entryKey, key)) { + return currRef; + } + } + } + currRef = currRef.getNext(); + } + return null; + } + + @SuppressWarnings({ "unchecked" }) + private Reference[] createReferenceArray(int size) { + return new Reference[size]; + } + + private int getIndex(int hash, Reference[] references) { + return (hash & (references.length - 1)); + } + + /** + * Return the size of the current references array. + */ + public int getSize() { + return this.references.length; + } + + /** + * Return the total number of references in this segment. + */ + public int getCount() { + return this.count.get(); + } + } + + /** + * A reference to an {@link Entry} contained in the map. Implementations are + * usually wrappers around specific Java reference implementations (e.g., + * {@link SoftReference}). + * + * @param the key type + * @param the value type + */ + protected interface Reference { + + /** + * Return the referenced entry, or {@code null} if the entry is no longer + * available. + */ + @Nullable + Entry get(); + + /** + * Return the hash for the reference. + */ + int getHash(); + + /** + * Return the next reference in the chain, or {@code null} if none. + */ + @Nullable + Reference getNext(); + + /** + * Release this entry and ensure that it will be returned from + * {@code ReferenceManager#pollForPurge()}. + */ + void release(); + } + + /** + * A single map entry. + * + * @param the key type + * @param the value type + */ + protected static final class Entry implements Map.Entry { + + @Nullable + private final K key; + + @Nullable + private volatile V value; + + public Entry(@Nullable K key, @Nullable V value) { + this.key = key; + this.value = value; + } + + @Override + @Nullable + public K getKey() { + return this.key; + } + + @Override + @Nullable + public V getValue() { + return this.value; + } + + @Override + @Nullable + public V setValue(@Nullable V value) { + V previous = this.value; + this.value = value; + return previous; + } + + @Override + public String toString() { + return (this.key + "=" + this.value); + } + + @Override + @SuppressWarnings("rawtypes") + public boolean equals(@Nullable Object other) { + if (this == other) { + return true; + } + if (!(other instanceof Map.Entry)) { + return false; + } + Map.Entry otherEntry = (Map.Entry) other; + return (ObjectUtils.nullSafeEquals(getKey(), otherEntry.getKey()) + && ObjectUtils.nullSafeEquals(getValue(), otherEntry.getValue())); + } + + @Override + public int hashCode() { + return (ObjectUtils.nullSafeHashCode(this.key) ^ ObjectUtils.nullSafeHashCode(this.value)); + } + } + + /** + * A task that can be {@link Segment#doTask run} against a {@link Segment}. + */ + private abstract class Task { + + private final EnumSet options; + + public Task(TaskOption... options) { + this.options = (options.length == 0 ? EnumSet.noneOf(TaskOption.class) : EnumSet.of(options[0], options)); + } + + public boolean hasOption(TaskOption option) { + return this.options.contains(option); + } + + /** + * Execute the task. + * + * @param ref the found reference (or {@code null}) + * @param entry the found entry (or {@code null}) + * @param entries access to the underlying entries + * @return the result of the task + * @see #execute(Reference, Entry) + */ + @Nullable + protected T execute(@Nullable Reference ref, @Nullable Entry entry, @Nullable Entries entries) { + return execute(ref, entry); + } + + /** + * Convenience method that can be used for tasks that do not need access to + * {@link Entries}. + * + * @param ref the found reference (or {@code null}) + * @param entry the found entry (or {@code null}) + * @return the result of the task + * @see #execute(Reference, Entry, Entries) + */ + @Nullable + protected T execute(@Nullable Reference ref, @Nullable Entry entry) { + return null; + } + } + + /** + * Various options supported by a {@code Task}. + */ + private enum TaskOption { + + RESTRUCTURE_BEFORE, RESTRUCTURE_AFTER, SKIP_IF_EMPTY, RESIZE + } + + /** + * Allows a task access to {@link Segment} entries. + */ + private interface Entries { + + /** + * Add a new entry with the specified value. + * + * @param value the value to add + */ + void add(@Nullable V value); + } + + /** + * Internal entry-set implementation. + */ + private class EntrySet extends AbstractSet> { + + @Override + public Iterator> iterator() { + return new EntryIterator(); + } + + @Override + public boolean contains(@Nullable Object o) { + if (o instanceof Map.Entry) { + Map.Entry entry = (Map.Entry) o; + Reference ref = ConcurrentReferenceHashMap.this.getReference(entry.getKey(), Restructure.NEVER); + Entry otherEntry = (ref != null ? ref.get() : null); + if (otherEntry != null) { + return ObjectUtils.nullSafeEquals(entry.getValue(), otherEntry.getValue()); + } + } + return false; + } + + @Override + public boolean remove(Object o) { + if (o instanceof Map.Entry) { + Map.Entry entry = (Map.Entry) o; + return ConcurrentReferenceHashMap.this.remove(entry.getKey(), entry.getValue()); + } + return false; + } + + @Override + public int size() { + return ConcurrentReferenceHashMap.this.size(); + } + + @Override + public void clear() { + ConcurrentReferenceHashMap.this.clear(); + } + } + + /** + * Internal entry iterator implementation. + */ + private class EntryIterator implements Iterator> { + + private int segmentIndex; + + private int referenceIndex; + + @Nullable + private Reference[] references; + + @Nullable + private Reference reference; + + @Nullable + private Entry next; + + @Nullable + private Entry last; + + public EntryIterator() { + moveToNextSegment(); + } + + @Override + public boolean hasNext() { + getNextIfNecessary(); + return (this.next != null); + } + + @Override + public Entry next() { + getNextIfNecessary(); + if (this.next == null) { + throw new NoSuchElementException(); + } + this.last = this.next; + this.next = null; + return this.last; + } + + private void getNextIfNecessary() { + while (this.next == null) { + moveToNextReference(); + if (this.reference == null) { + return; + } + this.next = this.reference.get(); + } + } + + private void moveToNextReference() { + if (this.reference != null) { + this.reference = this.reference.getNext(); + } + while (this.reference == null && this.references != null) { + if (this.referenceIndex >= this.references.length) { + moveToNextSegment(); + this.referenceIndex = 0; + } else { + this.reference = this.references[this.referenceIndex]; + this.referenceIndex++; + } + } + } + + private void moveToNextSegment() { + this.reference = null; + this.references = null; + if (this.segmentIndex < ConcurrentReferenceHashMap.this.segments.length) { + this.references = ConcurrentReferenceHashMap.this.segments[this.segmentIndex].references; + this.segmentIndex++; + } + } + + @Override + public void remove() { + Assert.state(this.last != null, "No element to remove"); + ConcurrentReferenceHashMap.this.remove(this.last.getKey()); + this.last = null; + } + } + + /** + * The types of restructuring that can be performed. + */ + protected enum Restructure { + + WHEN_NECESSARY, NEVER + } + + /** + * Strategy class used to manage {@link Reference References}. This class can be + * overridden if alternative reference types need to be supported. + */ + protected class ReferenceManager { + + private final ReferenceQueue> queue = new ReferenceQueue<>(); + + /** + * Factory method used to create a new {@link Reference}. + * + * @param entry the entry contained in the reference + * @param hash the hash + * @param next the next reference in the chain, or {@code null} if none + * @return a new {@link Reference} + */ + public Reference createReference(Entry entry, int hash, @Nullable Reference next) { + if (ConcurrentReferenceHashMap.this.referenceType == ReferenceType.WEAK) { + return new WeakEntryReference<>(entry, hash, next, this.queue); + } + return new SoftEntryReference<>(entry, hash, next, this.queue); + } + + /** + * Return any reference that has been garbage collected and can be purged from + * the underlying structure or {@code null} if no references need purging. This + * method must be thread safe and ideally should not block when returning + * {@code null}. References should be returned once and only once. + * + * @return a reference to purge or {@code null} + */ + @SuppressWarnings("unchecked") + @Nullable + public Reference pollForPurge() { + return (Reference) this.queue.poll(); + } + } + + /** + * Internal {@link Reference} implementation for {@link SoftReference + * SoftReferences}. + */ + private static final class SoftEntryReference extends SoftReference> implements Reference { + + private final int hash; + + @Nullable + private final Reference nextReference; + + public SoftEntryReference(Entry entry, int hash, @Nullable Reference next, + ReferenceQueue> queue) { + + super(entry, queue); + this.hash = hash; + this.nextReference = next; + } + + @Override + public int getHash() { + return this.hash; + } + + @Override + @Nullable + public Reference getNext() { + return this.nextReference; + } + + @Override + public void release() { + enqueue(); + clear(); + } + } + + /** + * Internal {@link Reference} implementation for {@link WeakReference + * WeakReferences}. + */ + private static final class WeakEntryReference extends WeakReference> implements Reference { + + private final int hash; + + @Nullable + private final Reference nextReference; + + public WeakEntryReference(Entry entry, int hash, @Nullable Reference next, + ReferenceQueue> queue) { + + super(entry, queue); + this.hash = hash; + this.nextReference = next; + } + + @Override + public int getHash() { + return this.hash; + } + + @Override + @Nullable + public Reference getNext() { + return this.nextReference; + } + + @Override + public void release() { + enqueue(); + clear(); + } + } + +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/springframework/ObjectUtils.java b/lang/java/avro/src/main/java/org/apache/avro/util/springframework/ObjectUtils.java new file mode 100644 index 00000000000..a8e0c45180e --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/util/springframework/ObjectUtils.java @@ -0,0 +1,320 @@ +/* + * Copyright 2002-2021 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.util.springframework; + +import org.apache.avro.reflect.Nullable; +import org.apache.avro.util.ClassUtils; + +import java.util.Arrays; + +/** + * Miscellaneous object utility methods. + * + *

    + * Mainly for internal use within the framework. + * + *

    + * Thanks to Alex Ruiz for contributing several enhancements to this class! + * + * @author Juergen Hoeller + * @author Keith Donald + * @author Rod Johnson + * @author Rob Harrop + * @author Chris Beams + * @author Sam Brannen + * @see ClassUtils see CollectionUtils see StringUtils + * @since 19.03.2004 + */ +class ObjectUtils { + private ObjectUtils() { + } + + private static final int INITIAL_HASH = 7; + private static final int MULTIPLIER = 31; + + /** + * Determine whether the given array is empty: i.e. {@code null} or of zero + * length. + * + * @param array the array to check + */ + public static boolean isEmpty(@Nullable Object[] array) { + return (array == null || array.length == 0); + } + + // --------------------------------------------------------------------- + // Convenience methods for content-based equality/hash-code handling + // --------------------------------------------------------------------- + + /** + * Determine if the given objects are equal, returning {@code true} if both are + * {@code null} or {@code false} if only one is {@code null}. + *

    + * Compares arrays with {@code Arrays.equals}, performing an equality check + * based on the array elements rather than the array reference. + * + * @param o1 first Object to compare + * @param o2 second Object to compare + * @return whether the given objects are equal + * @see Object#equals(Object) + * @see Arrays#equals + */ + public static boolean nullSafeEquals(@Nullable Object o1, @Nullable Object o2) { + if (o1 == o2) { + return true; + } + if (o1 == null || o2 == null) { + return false; + } + if (o1.equals(o2)) { + return true; + } + if (o1.getClass().isArray() && o2.getClass().isArray()) { + return arrayEquals(o1, o2); + } + return false; + } + + /** + * Compare the given arrays with {@code Arrays.equals}, performing an equality + * check based on the array elements rather than the array reference. + * + * @param o1 first array to compare + * @param o2 second array to compare + * @return whether the given objects are equal + * @see #nullSafeEquals(Object, Object) + * @see Arrays#equals + */ + private static boolean arrayEquals(Object o1, Object o2) { + if (o1 instanceof Object[] && o2 instanceof Object[]) { + return Arrays.equals((Object[]) o1, (Object[]) o2); + } + if (o1 instanceof boolean[] && o2 instanceof boolean[]) { + return Arrays.equals((boolean[]) o1, (boolean[]) o2); + } + if (o1 instanceof byte[] && o2 instanceof byte[]) { + return Arrays.equals((byte[]) o1, (byte[]) o2); + } + if (o1 instanceof char[] && o2 instanceof char[]) { + return Arrays.equals((char[]) o1, (char[]) o2); + } + if (o1 instanceof double[] && o2 instanceof double[]) { + return Arrays.equals((double[]) o1, (double[]) o2); + } + if (o1 instanceof float[] && o2 instanceof float[]) { + return Arrays.equals((float[]) o1, (float[]) o2); + } + if (o1 instanceof int[] && o2 instanceof int[]) { + return Arrays.equals((int[]) o1, (int[]) o2); + } + if (o1 instanceof long[] && o2 instanceof long[]) { + return Arrays.equals((long[]) o1, (long[]) o2); + } + if (o1 instanceof short[] && o2 instanceof short[]) { + return Arrays.equals((short[]) o1, (short[]) o2); + } + return false; + } + + /** + * Return as hash code for the given object; typically the value of + * {@code Object#hashCode()}}. If the object is an array, this method will + * delegate to any of the {@code nullSafeHashCode} methods for arrays in this + * class. If the object is {@code null}, this method returns 0. + * + * @see Object#hashCode() + * @see #nullSafeHashCode(Object[]) + * @see #nullSafeHashCode(boolean[]) + * @see #nullSafeHashCode(byte[]) + * @see #nullSafeHashCode(char[]) + * @see #nullSafeHashCode(double[]) + * @see #nullSafeHashCode(float[]) + * @see #nullSafeHashCode(int[]) + * @see #nullSafeHashCode(long[]) + * @see #nullSafeHashCode(short[]) + */ + public static int nullSafeHashCode(@Nullable Object obj) { + if (obj == null) { + return 0; + } + if (obj.getClass().isArray()) { + if (obj instanceof Object[]) { + return nullSafeHashCode((Object[]) obj); + } + if (obj instanceof boolean[]) { + return nullSafeHashCode((boolean[]) obj); + } + if (obj instanceof byte[]) { + return nullSafeHashCode((byte[]) obj); + } + if (obj instanceof char[]) { + return nullSafeHashCode((char[]) obj); + } + if (obj instanceof double[]) { + return nullSafeHashCode((double[]) obj); + } + if (obj instanceof float[]) { + return nullSafeHashCode((float[]) obj); + } + if (obj instanceof int[]) { + return nullSafeHashCode((int[]) obj); + } + if (obj instanceof long[]) { + return nullSafeHashCode((long[]) obj); + } + if (obj instanceof short[]) { + return nullSafeHashCode((short[]) obj); + } + } + return obj.hashCode(); + } + + /** + * Return a hash code based on the contents of the specified array. If + * {@code array} is {@code null}, this method returns 0. + */ + public static int nullSafeHashCode(@Nullable Object[] array) { + if (array == null) { + return 0; + } + int hash = INITIAL_HASH; + for (Object element : array) { + hash = MULTIPLIER * hash + nullSafeHashCode(element); + } + return hash; + } + + /** + * Return a hash code based on the contents of the specified array. If + * {@code array} is {@code null}, this method returns 0. + */ + public static int nullSafeHashCode(@Nullable boolean[] array) { + if (array == null) { + return 0; + } + int hash = INITIAL_HASH; + for (boolean element : array) { + hash = MULTIPLIER * hash + Boolean.hashCode(element); + } + return hash; + } + + /** + * Return a hash code based on the contents of the specified array. If + * {@code array} is {@code null}, this method returns 0. + */ + public static int nullSafeHashCode(@Nullable byte[] array) { + if (array == null) { + return 0; + } + int hash = INITIAL_HASH; + for (byte element : array) { + hash = MULTIPLIER * hash + element; + } + return hash; + } + + /** + * Return a hash code based on the contents of the specified array. If + * {@code array} is {@code null}, this method returns 0. + */ + public static int nullSafeHashCode(@Nullable char[] array) { + if (array == null) { + return 0; + } + int hash = INITIAL_HASH; + for (char element : array) { + hash = MULTIPLIER * hash + element; + } + return hash; + } + + /** + * Return a hash code based on the contents of the specified array. If + * {@code array} is {@code null}, this method returns 0. + */ + public static int nullSafeHashCode(@Nullable double[] array) { + if (array == null) { + return 0; + } + int hash = INITIAL_HASH; + for (double element : array) { + hash = MULTIPLIER * hash + Double.hashCode(element); + } + return hash; + } + + /** + * Return a hash code based on the contents of the specified array. If + * {@code array} is {@code null}, this method returns 0. + */ + public static int nullSafeHashCode(@Nullable float[] array) { + if (array == null) { + return 0; + } + int hash = INITIAL_HASH; + for (float element : array) { + hash = MULTIPLIER * hash + Float.hashCode(element); + } + return hash; + } + + /** + * Return a hash code based on the contents of the specified array. If + * {@code array} is {@code null}, this method returns 0. + */ + public static int nullSafeHashCode(@Nullable int[] array) { + if (array == null) { + return 0; + } + int hash = INITIAL_HASH; + for (int element : array) { + hash = MULTIPLIER * hash + element; + } + return hash; + } + + /** + * Return a hash code based on the contents of the specified array. If + * {@code array} is {@code null}, this method returns 0. + */ + public static int nullSafeHashCode(@Nullable long[] array) { + if (array == null) { + return 0; + } + int hash = INITIAL_HASH; + for (long element : array) { + hash = MULTIPLIER * hash + Long.hashCode(element); + } + return hash; + } + + /** + * Return a hash code based on the contents of the specified array. If + * {@code array} is {@code null}, this method returns 0. + */ + public static int nullSafeHashCode(@Nullable short[] array) { + if (array == null) { + return 0; + } + int hash = INITIAL_HASH; + for (short element : array) { + hash = MULTIPLIER * hash + element; + } + return hash; + } +} diff --git a/lang/java/avro/src/main/resources/META-INF/services/org.apache.avro.SchemaFormatterFactory b/lang/java/avro/src/main/resources/META-INF/services/org.apache.avro.SchemaFormatterFactory new file mode 100644 index 00000000000..06f140bde45 --- /dev/null +++ b/lang/java/avro/src/main/resources/META-INF/services/org.apache.avro.SchemaFormatterFactory @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +org.apache.avro.JsonSchemaFormatterFactory +org.apache.avro.CanonicalSchemaFormatterFactory diff --git a/lang/java/avro/src/test/java/org/apache/avro/CustomType.java b/lang/java/avro/src/test/java/org/apache/avro/CustomType.java new file mode 100644 index 00000000000..140ac901b0b --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/CustomType.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro; + +import java.util.Objects; + +public final class CustomType { + private final String name; + + public CustomType(CharSequence name) { + this.name = name.toString(); + } + + public String getName() { + return name; + } + + @Override + public int hashCode() { + return Objects.hashCode(name); + } + + @Override + public boolean equals(Object obj) { + return obj instanceof CustomType && name.equals(((CustomType) obj).name); + } + + @Override + public String toString() { + return "CustomType{name='" + name + "'}"; + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/CustomTypeConverter.java b/lang/java/avro/src/test/java/org/apache/avro/CustomTypeConverter.java new file mode 100644 index 00000000000..de8fea02ca4 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/CustomTypeConverter.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro; + +public class CustomTypeConverter extends Conversion { + private static final CustomTypeLogicalTypeFactory logicalTypeFactory = new CustomTypeLogicalTypeFactory(); + + @Override + public Class getConvertedType() { + return CustomType.class; + } + + @Override + public String getLogicalTypeName() { + return logicalTypeFactory.getTypeName(); + } + + @Override + public Schema getRecommendedSchema() { + return Schema.create(Schema.Type.STRING); + } + + @Override + public CustomType fromCharSequence(CharSequence value, Schema schema, LogicalType type) { + return new CustomType(value); + } + + @Override + public CharSequence toCharSequence(CustomType value, Schema schema, LogicalType type) { + return value.getName(); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/CustomTypeLogicalTypeFactory.java b/lang/java/avro/src/test/java/org/apache/avro/CustomTypeLogicalTypeFactory.java new file mode 100644 index 00000000000..3e121e0242c --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/CustomTypeLogicalTypeFactory.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +public class CustomTypeLogicalTypeFactory implements LogicalTypes.LogicalTypeFactory { + @Override + public LogicalType fromSchema(Schema schema) { + return new LogicalType(getTypeName()); + } + + @Override + public String getTypeName() { + return "custom"; + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/DummySchemaParser.java b/lang/java/avro/src/test/java/org/apache/avro/DummySchemaParser.java new file mode 100644 index 00000000000..db7dc640521 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/DummySchemaParser.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.URI; + +public class DummySchemaParser implements FormattedSchemaParser { + /** + * Logger for this class. + */ + private static final Logger LOGGER = LoggerFactory.getLogger(DummySchemaParser.class); + public static final String SCHEMA_TEXT_ONE = "one"; + public static final Schema FIXED_SCHEMA = Schema.createFixed("DummyOne", null, "tests", 42); + public static final String SCHEMA_TEXT_ERROR = "error"; + public static final String SCHEMA_TEXT_IO_ERROR = "io-error"; + public static final String ERROR_MESSAGE = "Syntax error"; + public static final String IO_ERROR_MESSAGE = "I/O error"; + + @Override + public Schema parse(ParseContext parseContext, URI baseUri, CharSequence formattedSchema) + throws IOException, SchemaParseException { + LOGGER.debug("Using DummySchemaParser for {}", formattedSchema); + if (SCHEMA_TEXT_ONE.contentEquals(formattedSchema)) { + parseContext.put(FIXED_SCHEMA); + return FIXED_SCHEMA; + } else if (SCHEMA_TEXT_ERROR.contentEquals(formattedSchema)) { + throw new SchemaParseException(ERROR_MESSAGE); + } else if (SCHEMA_TEXT_IO_ERROR.contentEquals(formattedSchema)) { + throw new IOException(IO_ERROR_MESSAGE); + } + // Syntax not recognized + return null; + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/FooBarSpecificRecord.java b/lang/java/avro/src/test/java/org/apache/avro/FooBarSpecificRecord.java index 7c2d5c57339..a5942d8a9e4 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/FooBarSpecificRecord.java +++ b/lang/java/avro/src/test/java/org/apache/avro/FooBarSpecificRecord.java @@ -30,7 +30,7 @@ public class FooBarSpecificRecord extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord { private static final long serialVersionUID = 1031933828916876443L; - public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse( + public static final org.apache.avro.Schema SCHEMA$ = org.apache.avro.JsonSchemaParser.parseInternal( "{\"type\":\"record\",\"name\":\"FooBarSpecificRecord\",\"namespace\":\"org.apache.avro\",\"fields\":[{\"name\":\"id\",\"type\":\"int\"},{\"name\":\"name\",\"type\":{\"type\":\"string\",\"avro.java.string\":\"String\"}},{\"name\":\"nicknames\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"string\",\"avro.java.string\":\"String\"}}},{\"name\":\"relatedids\",\"type\":{\"type\":\"array\",\"items\":\"int\"}},{\"name\":\"typeEnum\",\"type\":[\"null\",{\"type\":\"enum\",\"name\":\"TypeEnum\",\"symbols\":[\"a\",\"b\",\"c\"]}],\"default\":null}]}"); public static org.apache.avro.Schema getClassSchema() { diff --git a/lang/java/avro/src/test/java/org/apache/avro/GenerateBlockingData.java b/lang/java/avro/src/test/java/org/apache/avro/GenerateBlockingData.java index 0314082d80a..820b564521e 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/GenerateBlockingData.java +++ b/lang/java/avro/src/test/java/org/apache/avro/GenerateBlockingData.java @@ -58,7 +58,7 @@ public static void main(String[] args) throws Exception { System.exit(-1); } - Schema sch = new Schema.Parser().parse(new File(args[0])); + Schema sch = new SchemaParser().parse(new File(args[0])).mainSchema(); File outputFile = new File(args[1]); int numObjects = Integer.parseInt(args[2]); diff --git a/lang/java/avro/src/test/java/org/apache/avro/ParseContextTest.java b/lang/java/avro/src/test/java/org/apache/avro/ParseContextTest.java new file mode 100644 index 00000000000..d40a6cc9d83 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/ParseContextTest.java @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import org.apache.avro.util.SchemaResolver; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.util.EnumSet; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNotSame; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class ParseContextTest { + Schema fooRecord, fooRecordCopy, barEnum, bazFixed, mehRecord; + ParseContext fooBarBaz; + + @BeforeEach + public void setUp() throws Exception { + fooRecord = SchemaBuilder.record("ns.Foo").fields().endRecord(); + fooRecordCopy = SchemaBuilder.record("ns.Foo").fields().endRecord(); + barEnum = SchemaBuilder.enumeration("ns.Bar").symbols(); + bazFixed = SchemaBuilder.fixed("ns.Baz").size(8); + mehRecord = SchemaBuilder.record("ns.Meh").fields().endRecord(); + + fooBarBaz = new ParseContext(); + fooBarBaz.put(fooRecord); + fooBarBaz.put(barEnum); + fooBarBaz.put(bazFixed); + } + + @Test + public void checkNewNameContextContainsPrimitives() { + EnumSet complexTypes = EnumSet.of(Schema.Type.RECORD, Schema.Type.ENUM, Schema.Type.FIXED, + Schema.Type.UNION, Schema.Type.ARRAY, Schema.Type.MAP); + EnumSet primitives = EnumSet.complementOf(complexTypes); + + ParseContext context = new ParseContext(); + for (Schema.Type type : complexTypes) { + assertFalse(context.contains(type.getName())); + } + for (Schema.Type type : primitives) { + assertTrue(context.contains(type.getName())); + } + } + + @Test + public void primitivesAreNotCached() { + EnumSet primitives = EnumSet.complementOf(EnumSet.of(Schema.Type.RECORD, Schema.Type.ENUM, + Schema.Type.FIXED, Schema.Type.UNION, Schema.Type.ARRAY, Schema.Type.MAP)); + + ParseContext context = new ParseContext(); + for (Schema.Type type : primitives) { + Schema first = context.find(type.getName(), null); + Schema second = context.find(type.getName(), null); + assertEquals(first, second); + assertNotSame(first, second); + + first.addProp("logicalType", "brick"); + assertNotEquals(first, second); + } + } + + @Test + public void validateSchemaRetrievalFailure() { + Schema unknown = Schema.createFixed("unknown", null, null, 0); + + Schema unresolved = fooBarBaz.find("unknown", null); + assertTrue(SchemaResolver.isUnresolvedSchema(unresolved)); + assertEquals(unknown.getFullName(), SchemaResolver.getUnresolvedSchemaName(unresolved)); + } + + @Test + public void validateSchemaRetrievalByFullName() { + assertSame(fooRecord, fooBarBaz.find(fooRecord.getFullName(), null)); + } + + @Test + public void validateSchemaRetrievalBySimpleName() { + assertSame(fooRecord, fooBarBaz.find(fooRecord.getName(), fooRecord.getNamespace())); + } + + @Test + public void verifyPutIsIdempotent() { + ParseContext context = new ParseContext(); + assertNotEquals(fooRecord, context.find(fooRecord.getFullName(), null)); + + context.put(fooRecord); + assertEquals(fooRecord, context.find(fooRecord.getFullName(), null)); + + context.put(fooRecord); + assertEquals(fooRecord, context.find(fooRecord.getFullName(), null)); + } + + @Test + public void verifyPutOnlyAcceptsNamedSchemas() { + ParseContext context = new ParseContext(); + assertThrows(AvroRuntimeException.class, () -> context.put(Schema.create(Schema.Type.STRING))); + } + + @Test + public void verifyAddDoesNotAllowChangingSchemas() { + Schema fooEnum = SchemaBuilder.enumeration("ns.Foo").symbols(); + + ParseContext context = new ParseContext(); + context.put(fooRecord); + assertThrows(AvroRuntimeException.class, () -> context.put(fooEnum)); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/SchemaFormatterTest.java b/lang/java/avro/src/test/java/org/apache/avro/SchemaFormatterTest.java new file mode 100644 index 00000000000..00b76e28b94 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/SchemaFormatterTest.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +class SchemaFormatterTest { + + @Test + void validateDefaultNaming() { + assertEquals("json", new JsonSchemaFormatterFactory().formatName()); + assertThrows(AvroRuntimeException.class, () -> new Wrongly_Named_SchemaFormatterFactory().formatName()); + assertThrows(AvroRuntimeException.class, () -> new SchemaFormatterFactoryWithOddName().formatName()); + } + + @Test + void validateJsonFormatDefaultsToPrettyPrinting() { + Schema schema = Schema.createFixed("ns.Fixed", null, null, 16); + assertEquals(SchemaFormatter.format("json", schema), SchemaFormatter.format("json/pretty", schema)); + } + + @Test + void validateSupportForPrettyJsonFormat() { + Schema schema = Schema.createFixed("ns.Fixed", null, null, 16); + assertEquals("{\n \"type\" : \"fixed\",\n \"name\" : \"Fixed\",\n \"namespace\" : \"ns\",\n \"size\" : 16\n}", + SchemaFormatter.format("json/pretty", schema)); + } + + @Test + void validateSupportForInlineJsonFormat() { + Schema schema = Schema.createFixed("ns.Fixed", null, null, 16); + assertEquals("{\"type\":\"fixed\",\"name\":\"Fixed\",\"namespace\":\"ns\",\"size\":16}", + SchemaFormatter.format("json/inline", schema)); + } + + @Test + void checkThatJsonHasNoExtraVariant() { + assertThrows(AvroRuntimeException.class, () -> SchemaFormatter.getInstance("json/extra")); + } + + @Test + void validateSupportForCanonicalFormat() { + Schema schema = Schema.createFixed("Fixed", "Another test", "ns", 16); + assertEquals("{\"name\":\"ns.Fixed\",\"type\":\"fixed\",\"size\":16}", SchemaFormatter.format("canonical", schema)); + } + + @Test + void checkThatCanonicalFormHasNoVariants() { + assertThrows(AvroRuntimeException.class, () -> SchemaFormatter.getInstance("canonical/foo")); + } + + @Test + void checkExceptionForMissingFormat() { + assertThrows(AvroRuntimeException.class, () -> SchemaFormatter.getInstance("unknown")); + } + + private static class Wrongly_Named_SchemaFormatterFactory implements SchemaFormatterFactory { + + @Override + public SchemaFormatter getDefaultFormatter() { + return null; + } + } + + private static class SchemaFormatterFactoryWithOddName implements SchemaFormatterFactory { + @Override + public SchemaFormatter getDefaultFormatter() { + return null; + } + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/SchemaNameValidatorTest.java b/lang/java/avro/src/test/java/org/apache/avro/SchemaNameValidatorTest.java new file mode 100644 index 00000000000..871c172875d --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/SchemaNameValidatorTest.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.util.stream.Stream; + +class SchemaNameValidatorTest { + + @ParameterizedTest + @MethodSource("data") + void validator(NameValidator validator, String input, boolean expectedResult) { + NameValidator.Result result = validator.validate(input); + Assertions.assertEquals(expectedResult, result.isOK(), result.getErrors()); + } + + static Stream data() { + return Stream.of(Arguments.of(NameValidator.UTF_VALIDATOR, null, false), // null not accepted + Arguments.of(NameValidator.STRICT_VALIDATOR, null, false), // null not accepted + Arguments.of(NameValidator.UTF_VALIDATOR, "", false), // empty not accepted + Arguments.of(NameValidator.STRICT_VALIDATOR, "", false), // empty not accepted + Arguments.of(NameValidator.UTF_VALIDATOR, "Hello world", false), // space not accepted + Arguments.of(NameValidator.STRICT_VALIDATOR, "Hello world", false), // space not accepted + Arguments.of(NameValidator.UTF_VALIDATOR, "H&", false), // non letter or digit not accepted + Arguments.of(NameValidator.STRICT_VALIDATOR, "H&", false), // non letter or digit not accepted + Arguments.of(NameValidator.UTF_VALIDATOR, "H=", false), // non letter or digit not accepted + Arguments.of(NameValidator.STRICT_VALIDATOR, "H=", false), // non letter or digit not accepted + Arguments.of(NameValidator.UTF_VALIDATOR, "H]", false), // non letter or digit not accepted + Arguments.of(NameValidator.STRICT_VALIDATOR, "H]", false), // non letter or digit not accepted + Arguments.of(NameValidator.UTF_VALIDATOR, "Hello_world", true), + Arguments.of(NameValidator.STRICT_VALIDATOR, "Hello_world", true), + Arguments.of(NameValidator.UTF_VALIDATOR, "Êàçô", true), // Accept accent + Arguments.of(NameValidator.STRICT_VALIDATOR, "Êàçô", false), // Not Accept accent + Arguments.of(NameValidator.UTF_VALIDATOR, "5Êàçô", false), // can't start with number + Arguments.of(NameValidator.STRICT_VALIDATOR, "5Êàçô", false), // can't start with number + Arguments.of(NameValidator.UTF_VALIDATOR, "_Hello_world", true), + Arguments.of(NameValidator.STRICT_VALIDATOR, "_Hello_world", true)); + } + +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestBigDecimalConversion.java b/lang/java/avro/src/test/java/org/apache/avro/TestBigDecimalConversion.java new file mode 100644 index 00000000000..e781fe07bd9 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/TestBigDecimalConversion.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package org.apache.avro; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.util.Iterator; +import java.util.NoSuchElementException; +import java.util.Spliterator; +import java.util.Spliterators; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +public class TestBigDecimalConversion { + + private Conversion conversion = new Conversions.BigDecimalConversion(); + + private final LogicalType bigDecimal = LogicalTypes.bigDecimal(); + + private Schema bytesSchema = conversion.getRecommendedSchema(); + + @ParameterizedTest + @MethodSource("listBigDecimal") + void bigdec(BigDecimal d1) { + ByteBuffer d1bytes = conversion.toBytes(d1, bytesSchema, bigDecimal); + BigDecimal decimal1 = conversion.fromBytes(d1bytes, bytesSchema, bigDecimal); + Assertions.assertEquals(decimal1, d1); + } + + static Stream listBigDecimal() { + Iterator iterator = new Iterator() { + int index = 0; + + BigDecimal step = new BigDecimal(-2.7d); + + BigDecimal current = new BigDecimal(1.0d); + + @Override + public boolean hasNext() { + if (index == 50) { + // test small bigdecimal + current = new BigDecimal(1.0d); + step = new BigDecimal(-0.71d); + } + return index < 100; + } + + @Override + public BigDecimal next() { + if (!hasNext()) { + throw new NoSuchElementException(); + } + index++; + current = current.multiply(step); + return current; + } + }; + return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) + .map(Arguments::of); + + } + +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestCircularReferences.java b/lang/java/avro/src/test/java/org/apache/avro/TestCircularReferences.java index c3aa5a61063..6777722fdf2 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestCircularReferences.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestCircularReferences.java @@ -18,6 +18,9 @@ package org.apache.avro; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + import java.io.File; import java.io.IOException; import java.util.ArrayList; @@ -34,16 +37,14 @@ import org.apache.avro.io.DatumReader; import org.apache.avro.io.DatumWriter; import org.apache.avro.util.Utf8; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; public class TestCircularReferences { - @Rule - public TemporaryFolder temp = new TemporaryFolder(); + @TempDir + public File temp; public static class Reference extends LogicalType { private static final String REFERENCE = "reference"; @@ -152,7 +153,7 @@ public String getTypeName() { } } - @BeforeClass + @BeforeAll public static void addReferenceTypes() { LogicalTypes.register(Referenceable.REFERENCEABLE, new ReferenceableTypeFactory()); LogicalTypes.register(Reference.REFERENCE, new ReferenceTypeFactory()); @@ -303,7 +304,7 @@ public Schema getSchema() { } @Test - public void test() throws IOException { + void test() throws IOException { ReferenceManager manager = new ReferenceManager(); GenericData model = new GenericData(); model.addLogicalTypeConversion(manager.getTracker()); @@ -348,17 +349,17 @@ public void test() throws IOException { Record actual = records.get(0); // because the record is a recursive structure, equals won't work - Assert.assertEquals("Should correctly read back the parent id", 1L, actual.get("id")); - Assert.assertEquals("Should correctly read back the parent data", new Utf8("parent data!"), actual.get("p")); + assertEquals(1L, actual.get("id"), "Should correctly read back the parent id"); + assertEquals(new Utf8("parent data!"), actual.get("p"), "Should correctly read back the parent data"); Record actualChild = (Record) actual.get("child"); - Assert.assertEquals("Should correctly read back the child data", new Utf8("child data!"), actualChild.get("c")); + assertEquals(new Utf8("child data!"), actualChild.get("c"), "Should correctly read back the child data"); Object childParent = actualChild.get("parent"); - Assert.assertTrue("Should have a parent Record object", childParent instanceof Record); + assertTrue(childParent instanceof Record, "Should have a parent Record object"); Record childParentRecord = (Record) actualChild.get("parent"); - Assert.assertEquals("Should have the right parent id", 1L, childParentRecord.get("id")); - Assert.assertEquals("Should have the right parent data", new Utf8("parent data!"), childParentRecord.get("p")); + assertEquals(1L, childParentRecord.get("id"), "Should have the right parent id"); + assertEquals(new Utf8("parent data!"), childParentRecord.get("p"), "Should have the right parent data"); } private List read(GenericData model, Schema schema, File file) throws IOException { @@ -381,7 +382,7 @@ private DatumReader newReader(GenericData model, Schema schema) { @SuppressWarnings("unchecked") private File write(GenericData model, Schema schema, D... data) throws IOException { - File file = temp.newFile(); + File file = File.createTempFile("junit", null, temp); DatumWriter writer = model.createDatumWriter(schema); try (DataFileWriter fileWriter = new DataFileWriter<>(writer)) { diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestCompare.java b/lang/java/avro/src/test/java/org/apache/avro/TestCompare.java new file mode 100644 index 00000000000..d3cba3573cf --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/TestCompare.java @@ -0,0 +1,212 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayOutputStream; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import java.io.IOException; +import java.nio.ByteBuffer; + +import org.apache.avro.generic.GenericArray; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.io.BinaryData; +import org.apache.avro.io.DatumWriter; +import org.apache.avro.io.Encoder; +import org.apache.avro.io.EncoderFactory; +import org.apache.avro.util.Utf8; + +public class TestCompare { + + @Test + void testNull() throws Exception { + Schema schema = SchemaParser.parseSingle("\"null\""); + byte[] b = render(null, schema, new GenericDatumWriter<>()); + assertEquals(0, BinaryData.compare(b, 0, b, 0, schema)); + } + + @Test + void testBoolean() throws Exception { + check("\"boolean\"", Boolean.FALSE, Boolean.TRUE); + } + + @Test + void string() throws Exception { + check("\"string\"", new Utf8(""), new Utf8("a")); + check("\"string\"", new Utf8("a"), new Utf8("b")); + check("\"string\"", new Utf8("a"), new Utf8("ab")); + check("\"string\"", new Utf8("ab"), new Utf8("b")); + } + + @Test + void bytes() throws Exception { + check("\"bytes\"", ByteBuffer.wrap(new byte[] {}), ByteBuffer.wrap(new byte[] { 1 })); + check("\"bytes\"", ByteBuffer.wrap(new byte[] { 1 }), ByteBuffer.wrap(new byte[] { 2 })); + check("\"bytes\"", ByteBuffer.wrap(new byte[] { 1, 2 }), ByteBuffer.wrap(new byte[] { 2 })); + } + + @Test + void testInt() throws Exception { + check("\"int\"", -1, 0); + check("\"int\"", 0, 1); + } + + @Test + void testLong() throws Exception { + check("\"long\"", 11L, 12L); + check("\"long\"", (long) -1, 1L); + } + + @Test + void testFloat() throws Exception { + check("\"float\"", 1.1f, 1.2f); + check("\"float\"", (float) -1.1, 1.0f); + } + + @Test + void testDouble() throws Exception { + check("\"double\"", 1.2, 1.3); + check("\"double\"", -1.2, 1.3); + } + + @Test + void array() throws Exception { + String json = "{\"type\":\"array\", \"items\": \"long\"}"; + Schema schema = SchemaParser.parseSingle(json); + GenericArray a1 = new GenericData.Array<>(1, schema); + a1.add(1L); + GenericArray a2 = new GenericData.Array<>(1, schema); + a2.add(1L); + a2.add(0L); + check(json, a1, a2); + } + + @Test + void record() throws Exception { + String fields = " \"fields\":[" + "{\"name\":\"f\",\"type\":\"int\",\"order\":\"ignore\"}," + + "{\"name\":\"g\",\"type\":\"int\",\"order\":\"descending\"}," + "{\"name\":\"h\",\"type\":\"int\"}]}"; + String recordJson = "{\"type\":\"record\", \"name\":\"Test\"," + fields; + Schema schema = SchemaParser.parseSingle(recordJson); + GenericData.Record r1 = new GenericData.Record(schema); + r1.put("f", 1); + r1.put("g", 13); + r1.put("h", 41); + GenericData.Record r2 = new GenericData.Record(schema); + r2.put("f", 0); + r2.put("g", 12); + r2.put("h", 41); + check(recordJson, r1, r2); + r2.put("f", 0); + r2.put("g", 13); + r2.put("h", 42); + check(recordJson, r1, r2); + + String record2Json = "{\"type\":\"record\", \"name\":\"Test2\"," + fields; + Schema schema2 = SchemaParser.parseSingle(record2Json); + GenericData.Record r3 = new GenericData.Record(schema2); + r3.put("f", 1); + r3.put("g", 13); + r3.put("h", 41); + assert (!r1.equals(r3)); // same fields, diff name + } + + @Test + void testEnum() throws Exception { + String json = "{\"type\":\"enum\", \"name\":\"Test\",\"symbols\": [\"A\", \"B\"]}"; + Schema schema = SchemaParser.parseSingle(json); + check(json, new GenericData.EnumSymbol(schema, "A"), new GenericData.EnumSymbol(schema, "B")); + } + + @Test + void fixed() throws Exception { + String json = "{\"type\": \"fixed\", \"name\":\"Test\", \"size\": 1}"; + Schema schema = SchemaParser.parseSingle(json); + check(json, new GenericData.Fixed(schema, new byte[] { (byte) 'a' }), + new GenericData.Fixed(schema, new byte[] { (byte) 'b' })); + } + + @Test + void union() throws Exception { + check("[\"string\", \"long\"]", new Utf8("a"), new Utf8("b"), false); + check("[\"string\", \"long\"]", 1L, 2L, false); + check("[\"string\", \"long\"]", new Utf8("a"), 1L, false); + } + + private static void check(String schemaJson, T o1, T o2) throws Exception { + check(schemaJson, o1, o2, true); + } + + private static void check(String schemaJson, T o1, T o2, boolean comparable) throws Exception { + check(SchemaParser.parseSingle(schemaJson), o1, o2, comparable, new GenericDatumWriter<>(), GenericData.get()); + } + + private static void check(Schema schema, T o1, T o2, boolean comparable, DatumWriter writer, + GenericData comparator) throws Exception { + + byte[] b1 = render(o1, schema, writer); + byte[] b2 = render(o2, schema, writer); + assertEquals(-1, BinaryData.compare(b1, 0, b2, 0, schema)); + assertEquals(1, BinaryData.compare(b2, 0, b1, 0, schema)); + assertEquals(0, BinaryData.compare(b1, 0, b1, 0, schema)); + assertEquals(0, BinaryData.compare(b2, 0, b2, 0, schema)); + + assertEquals(-1, compare(o1, o2, schema, comparable, comparator)); + assertEquals(1, compare(o2, o1, schema, comparable, comparator)); + assertEquals(0, compare(o1, o1, schema, comparable, comparator)); + assertEquals(0, compare(o2, o2, schema, comparable, comparator)); + + assert (o1.equals(o1)); + assert (o2.equals(o2)); + assert (!o1.equals(o2)); + assert (!o2.equals(o1)); + assert (!o1.equals(new Object())); + assert (!o2.equals(new Object())); + assert (!o1.equals(null)); + assert (!o2.equals(null)); + + assert (o1.hashCode() != o2.hashCode()); + + // check BinaryData.hashCode against Object.hashCode + if (schema.getType() != Schema.Type.ENUM) { + assertEquals(o1.hashCode(), BinaryData.hashCode(b1, 0, b1.length, schema)); + assertEquals(o2.hashCode(), BinaryData.hashCode(b2, 0, b2.length, schema)); + } + + // check BinaryData.hashCode against GenericData.hashCode + assertEquals(comparator.hashCode(o1, schema), BinaryData.hashCode(b1, 0, b1.length, schema)); + assertEquals(comparator.hashCode(o2, schema), BinaryData.hashCode(b2, 0, b2.length, schema)); + + } + + @SuppressWarnings(value = "unchecked") + private static int compare(Object o1, Object o2, Schema schema, boolean comparable, GenericData comparator) { + return comparable ? ((Comparable) o1).compareTo(o2) : comparator.compare(o1, o2, schema); + } + + private static byte[] render(T datum, Schema schema, DatumWriter writer) throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + writer.setSchema(schema); + Encoder enc = new EncoderFactory().directBinaryEncoder(out, null); + writer.write(datum, enc); + enc.flush(); + return out.toByteArray(); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestDataFile.java b/lang/java/avro/src/test/java/org/apache/avro/TestDataFile.java index a5c0dec3efe..ed87a6fd312 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestDataFile.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestDataFile.java @@ -17,17 +17,6 @@ */ package org.apache.avro; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; - -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Random; import org.apache.avro.file.CodecFactory; import org.apache.avro.file.DataFileReader; import org.apache.avro.file.DataFileStream; @@ -38,33 +27,40 @@ import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.io.BinaryEncoder; import org.apache.avro.io.DatumReader; +import org.apache.avro.io.EncoderFactory; import org.apache.avro.util.RandomData; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -@RunWith(Parameterized.class) -public class TestDataFile { - private static final Logger LOG = LoggerFactory.getLogger(TestDataFile.class); +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; +import java.util.function.Function; +import java.util.stream.Stream; - @Rule - public TemporaryFolder DIR = new TemporaryFolder(); +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; - private final CodecFactory codec; +public class TestDataFile { + private static final Logger LOG = LoggerFactory.getLogger(TestDataFile.class); - public TestDataFile(CodecFactory codec) { - this.codec = codec; - LOG.info("Running with codec: " + codec); - } + @TempDir + public File DIR; - @Parameters - public static List codecs() { + public static Stream codecs() { List r = new ArrayList<>(); r.add(new Object[] { null }); r.add(new Object[] { CodecFactory.deflateCodec(0) }); @@ -81,7 +77,7 @@ public static List codecs() { r.add(new Object[] { CodecFactory.zstandardCodec(18, true) }); r.add(new Object[] { CodecFactory.zstandardCodec(0, false, false) }); r.add(new Object[] { CodecFactory.zstandardCodec(0, false, true) }); - return r; + return r.stream().map(Arguments::of); } private static final int COUNT = Integer.parseInt(System.getProperty("test.count", "200")); @@ -90,30 +86,51 @@ public static List codecs() { private static final long SEED = System.currentTimeMillis(); private static final String SCHEMA_JSON = "{\"type\": \"record\", \"name\": \"Test\", \"fields\": [" + "{\"name\":\"stringField\", \"type\":\"string\"}," + "{\"name\":\"longField\", \"type\":\"long\"}]}"; - private static final Schema SCHEMA = new Schema.Parser().parse(SCHEMA_JSON); + private static final Schema SCHEMA = SchemaParser.parseSingle(SCHEMA_JSON); + private static final Object LAST_RECORD; + static { + Object lastValue = null; + for (Object object : new RandomData(SCHEMA, COUNT, SEED)) { + lastValue = object; + } + LAST_RECORD = lastValue; + } - private File makeFile() { - return new File(DIR.getRoot().getPath(), "test-" + codec + ".avro"); + private File makeFile(CodecFactory codec) { + return new File(DIR, "test-" + codec + ".avro"); } - @Test - public void runTestsInOrder() throws Exception { - testGenericWrite(); - testGenericRead(); - testSplits(); - testSyncDiscovery(); - testGenericAppend(); - testReadWithHeader(); - testFSync(false); - testFSync(true); + @ParameterizedTest + @MethodSource("codecs") + public void runTestsInOrder(CodecFactory codec) throws Exception { + // Run for both encoders, but the MethodSource didn't really like it, + // so it is just a loop within the test + List> encoders = new ArrayList<>(); + encoders.add(b -> new EncoderFactory().directBinaryEncoder(b, null)); + encoders.add(b -> new EncoderFactory().blockingDirectBinaryEncoder(b, null)); + + for (Function encoder : encoders) { + LOG.info("Running with codec: {}", codec); + testGenericWrite(codec, encoder); + testGenericRead(codec); + testSplits(codec); + testSyncDiscovery(codec); + testReadLastRecord(codec); + testGenericAppend(codec, encoder); + testReadWithHeader(codec); + testFSync(codec, encoder, false); + testFSync(codec, encoder, true); + } } - private void testGenericWrite() throws IOException { + private void testGenericWrite(CodecFactory codec, Function encoderFunc) + throws IOException { DataFileWriter writer = new DataFileWriter<>(new GenericDatumWriter<>()).setSyncInterval(100); if (codec != null) { writer.setCodec(codec); } - writer.create(SCHEMA, makeFile()); + writer.setEncoder(encoderFunc); + writer.create(SCHEMA, makeFile(codec)); try { int count = 0; for (Object datum : new RandomData(SCHEMA, COUNT, SEED)) { @@ -132,7 +149,7 @@ private void testGenericWrite() throws IOException { } catch (DataFileWriter.AppendWriteException e) { System.out.println("Ignoring: " + e); } - assertTrue("failed to throw when expected", threwProperly); + assertTrue(threwProperly, "failed to throw when expected"); } } } finally { @@ -148,11 +165,11 @@ private void testGenericWrite() throws IOException { doubleCloseEx = e; } - assertNull("Double close() threw an unexpected exception", doubleCloseEx); + assertNull(doubleCloseEx, "Double close() threw an unexpected exception"); } - private void testGenericRead() throws IOException { - try (DataFileReader reader = new DataFileReader<>(makeFile(), new GenericDatumReader<>())) { + private void testGenericRead(CodecFactory codec) throws IOException { + try (DataFileReader reader = new DataFileReader<>(makeFile(codec), new GenericDatumReader<>())) { Object datum = null; if (VALIDATE) { for (Object expected : new RandomData(SCHEMA, COUNT, SEED)) { @@ -167,8 +184,8 @@ private void testGenericRead() throws IOException { } } - private void testSplits() throws IOException { - File file = makeFile(); + private void testSplits(CodecFactory codec) throws IOException { + File file = makeFile(codec); try (DataFileReader reader = new DataFileReader<>(file, new GenericDatumReader<>())) { Random rand = new Random(SEED); int splits = 10; // number of splits @@ -190,8 +207,8 @@ private void testSplits() throws IOException { } } - private void testSyncDiscovery() throws IOException { - File file = makeFile(); + private void testSyncDiscovery(CodecFactory codec) throws IOException { + File file = makeFile(codec); try (DataFileReader reader = new DataFileReader<>(file, new GenericDatumReader<>())) { // discover the sync points ArrayList syncs = new ArrayList<>(); @@ -211,13 +228,46 @@ private void testSyncDiscovery() throws IOException { reader.seek(sync); assertNotNull(reader.next()); } + // Lastly, confirm that reading (but not decoding) all blocks results in the + // same sync points + reader.sync(0); + ArrayList syncs2 = new ArrayList<>(); + while (reader.hasNext()) { + syncs2.add(reader.previousSync()); + reader.nextBlock(); + } + assertEquals(syncs, syncs2); + } + } + + private void testReadLastRecord(CodecFactory codec) throws IOException { + File file = makeFile(codec); + try (DataFileReader reader = new DataFileReader<>(file, new GenericDatumReader<>())) { + long lastBlockStart = -1; + while (reader.hasNext()) { + // This algorithm can be made more efficient by checking if the underlying + // SeekableFileInput has been fully read: if so, the last block is in + // memory, and calls to next() will decode it. + // NOTE: this depends on the current implementation of DataFileReader. + lastBlockStart = reader.previousSync(); + reader.nextBlock(); + } + reader.seek(lastBlockStart); + + Object lastRecord = null; + while (reader.hasNext()) { + lastRecord = reader.next(lastRecord); + } + assertEquals(LAST_RECORD, lastRecord); } } - private void testGenericAppend() throws IOException { - File file = makeFile(); + private void testGenericAppend(CodecFactory codec, Function encoderFunc) + throws IOException { + File file = makeFile(codec); long start = file.length(); try (DataFileWriter writer = new DataFileWriter<>(new GenericDatumWriter<>()).appendTo(file)) { + writer.setEncoder(encoderFunc); for (Object datum : new RandomData(SCHEMA, COUNT, SEED + 1)) { writer.append(datum); } @@ -238,8 +288,8 @@ private void testGenericAppend() throws IOException { } } - private void testReadWithHeader() throws IOException { - File file = makeFile(); + private void testReadWithHeader(CodecFactory codec) throws IOException { + File file = makeFile(codec); try (DataFileReader reader = new DataFileReader<>(file, new GenericDatumReader<>())) { // get a header for this file DataFileStream.Header header = reader.getHeader(); @@ -249,26 +299,23 @@ private void testReadWithHeader() throws IOException { try (DataFileReader readerTrue = DataFileReader.openReader(sin, new GenericDatumReader<>(), header, true);) { - assertNotNull("Should be able to reopen from arbitrary point", readerTrue.next()); + assertNotNull(readerTrue.next(), "Should be able to reopen from arbitrary point"); long validPos = readerTrue.previousSync(); // post sync, we know of a valid sync point: re-open with seek (sync == false) sin.seek(validPos); try (DataFileReader readerFalse = DataFileReader.openReader(sin, new GenericDatumReader<>(), header, false)) { - assertEquals("Should not move from sync point on reopen", validPos, sin.tell()); - assertNotNull("Should be able to reopen at sync point", readerFalse.next()); + assertEquals(validPos, sin.tell(), "Should not move from sync point on reopen"); + assertNotNull(readerFalse.next(), "Should be able to reopen at sync point"); } - } - } - } @Test - public void testSyncInHeader() throws IOException { - try (DataFileReader reader = new DataFileReader<>(new File("../../../share/test/data/syncInMeta.avro"), - new GenericDatumReader<>())) { + public void syncInHeader() throws IOException { + try (DataFileReader reader = new DataFileReader<>( + new File("target/test-classes/share/test/data/syncInMeta.avro"), new GenericDatumReader<>())) { reader.sync(0); for (Object datum : reader) assertNotNull(datum); @@ -277,11 +324,11 @@ public void testSyncInHeader() throws IOException { @Test public void test12() throws IOException { - readFile(new File("../../../share/test/data/test.avro12"), new GenericDatumReader<>()); + readFile(new File("target/test-classes/share/test/data/test.avro12"), new GenericDatumReader<>()); } @Test - public void testFlushCount() throws IOException { + public void flushCount() throws IOException { DataFileWriter writer = new DataFileWriter<>(new GenericDatumWriter<>()); writer.setFlushOnEveryBlock(false); TestingByteArrayOutputStream out = new TestingByteArrayOutputStream(); @@ -310,12 +357,14 @@ public void testFlushCount() throws IOException { assertTrue(out.flushCount < currentCount && out.flushCount >= flushCounter); } - private void testFSync(boolean useFile) throws IOException { + private void testFSync(CodecFactory codec, Function encoderFunc, boolean useFile) + throws IOException { try (DataFileWriter writer = new DataFileWriter<>(new GenericDatumWriter<>())) { + writer.setEncoder(encoderFunc); writer.setFlushOnEveryBlock(false); TestingByteArrayOutputStream out = new TestingByteArrayOutputStream(); if (useFile) { - File f = makeFile(); + File f = makeFile(codec); try (SeekableFileInput in = new SeekableFileInput(f)) { writer.appendTo(in, out); } @@ -349,7 +398,7 @@ public static void main(String[] args) throws Exception { File input = new File(args[0]); Schema projection = null; if (args.length > 1) - projection = new Schema.Parser().parse(new File(args[1])); + projection = new SchemaParser().parse(new File(args[1])).mainSchema(); TestDataFile.readFile(input, new GenericDatumReader<>(null, projection)); long start = System.currentTimeMillis(); for (int i = 0; i < 4; i++) diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileConcat.java b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileConcat.java index f1267ab9788..2c2f0d8001d 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileConcat.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileConcat.java @@ -17,60 +17,42 @@ */ package org.apache.avro; -import static org.junit.Assert.assertEquals; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - import org.apache.avro.file.CodecFactory; import org.apache.avro.file.DataFileReader; import org.apache.avro.file.DataFileWriter; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.util.RandomData; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; + +import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -@RunWith(Parameterized.class) -public class TestDataFileConcat { - private static final Logger LOG = LoggerFactory.getLogger(TestDataFileConcat.class); - - @Rule - public TemporaryFolder DIR = new TemporaryFolder(); +import java.io.File; +import java.io.IOException; +import java.util.stream.Stream; - CodecFactory codec; - CodecFactory codec2; - boolean recompress; +import static org.junit.Assert.assertEquals; - public TestDataFileConcat(CodecFactory codec, CodecFactory codec2, Boolean recompress) { - this.codec = codec; - this.codec2 = codec2; - this.recompress = recompress; - LOG.info("Testing concatenating files, " + codec2 + " into " + codec + " with recompress=" + recompress); - } +public class TestDataFileConcat { + private static final Logger LOG = LoggerFactory.getLogger(TestDataFileConcat.class); - @Parameters - public static List codecs() { - List r = new ArrayList<>(); - r.add(new Object[] { null, null, false }); - r.add(new Object[] { null, null, true }); - r.add(new Object[] { CodecFactory.deflateCodec(1), CodecFactory.deflateCodec(6), false }); - r.add(new Object[] { CodecFactory.deflateCodec(1), CodecFactory.deflateCodec(6), true }); - r.add(new Object[] { CodecFactory.deflateCodec(3), CodecFactory.nullCodec(), false }); - r.add(new Object[] { CodecFactory.nullCodec(), CodecFactory.deflateCodec(6), false }); - r.add(new Object[] { CodecFactory.xzCodec(1), CodecFactory.xzCodec(2), false }); - r.add(new Object[] { CodecFactory.xzCodec(1), CodecFactory.xzCodec(2), true }); - r.add(new Object[] { CodecFactory.xzCodec(2), CodecFactory.nullCodec(), false }); - r.add(new Object[] { CodecFactory.nullCodec(), CodecFactory.xzCodec(2), false }); - return r; + @TempDir + public File DIR; + + public static Stream codecs() { + return Stream.of(Arguments.of(null, null, false), Arguments.of(null, null, true), + Arguments.of(CodecFactory.deflateCodec(1), CodecFactory.deflateCodec(6), false), + Arguments.of(CodecFactory.deflateCodec(1), CodecFactory.deflateCodec(6), true), + Arguments.of(CodecFactory.deflateCodec(3), CodecFactory.nullCodec(), false), + Arguments.of(CodecFactory.nullCodec(), CodecFactory.deflateCodec(6), false), + Arguments.of(CodecFactory.xzCodec(1), CodecFactory.xzCodec(2), false), + Arguments.of(CodecFactory.xzCodec(1), CodecFactory.xzCodec(2), true), + Arguments.of(CodecFactory.xzCodec(2), CodecFactory.nullCodec(), false), + Arguments.of(CodecFactory.nullCodec(), CodecFactory.xzCodec(2), false)); } private static final int COUNT = Integer.parseInt(System.getProperty("test.count", "200")); @@ -80,14 +62,15 @@ public static List codecs() { private static final String SCHEMA_JSON = "{\"type\": \"record\", \"name\": \"Test\", \"fields\": [" + "{\"name\":\"stringField\", \"type\":\"string\"}" + "," + "{\"name\":\"longField\", \"type\":\"long\"}" + "]}"; - private static final Schema SCHEMA = new Schema.Parser().parse(SCHEMA_JSON); + private static final Schema SCHEMA = SchemaParser.parseSingle(SCHEMA_JSON); private File makeFile(String name) { - return new File(DIR.getRoot().getPath(), "test-" + name + ".avro"); + return new File(DIR, "test-" + name + ".avro"); } - @Test - public void testConcatenateFiles() throws IOException { + @ParameterizedTest + @MethodSource("codecs") + void concatenateFiles(CodecFactory codec, CodecFactory codec2, boolean recompress) throws IOException { System.out.println("SEED = " + SEED); System.out.println("COUNT = " + COUNT); for (int k = 0; k < 5; k++) { diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileCorruption.java b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileCorruption.java index 437ef6cd409..1a8d54f3012 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileCorruption.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileCorruption.java @@ -17,7 +17,7 @@ */ package org.apache.avro; -import static org.junit.Assert.*; +import static org.junit.jupiter.api.Assertions.*; import java.io.ByteArrayOutputStream; import java.io.File; @@ -31,7 +31,7 @@ import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.util.Utf8; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestDataFileCorruption { @@ -42,7 +42,7 @@ private File makeFile(String name) { } @Test - public void testCorruptedFile() throws IOException { + void corruptedFile() throws IOException { Schema schema = Schema.create(Type.STRING); // Write a data file @@ -87,7 +87,10 @@ public void testCorruptedFile() throws IOException { assertEquals("fig", r.next().toString()); assertFalse(r.hasNext()); } catch (AvroRuntimeException e) { - assertEquals("Invalid sync!", e.getCause().getMessage()); + assertEquals("Invalid sync marker! The sync marker in the data block doesn't match the " + + "file header's sync marker. This likely indicates data corruption, truncated file, " + + "or incorrectly concatenated Avro files. Verify file integrity and ensure proper " + + "file transmission or creation.", e.getCause().getMessage()); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileCustomSync.java b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileCustomSync.java index 3ba52376dd5..62d81f63782 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileCustomSync.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileCustomSync.java @@ -18,8 +18,7 @@ package org.apache.avro; import static java.nio.charset.StandardCharsets.UTF_8; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.*; import java.io.ByteArrayOutputStream; import java.io.IOException; @@ -32,7 +31,7 @@ import org.apache.avro.file.DataFileWriter; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.util.Utf8; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestDataFileCustomSync { private byte[] createDataFile(byte[] sync) throws IOException { @@ -63,15 +62,17 @@ private static byte[] generateSync() { } } - @Test(expected = IOException.class) - public void testInvalidSync() throws IOException { - // Invalid size (must be 16): - byte[] sync = new byte[8]; - createDataFile(sync); + @Test + void invalidSync() throws IOException { + assertThrows(IOException.class, () -> { + // Invalid size (must be 16): + byte[] sync = new byte[8]; + createDataFile(sync); + }); } @Test - public void testRandomSync() throws IOException { + void randomSync() throws IOException { byte[] sync = generateSync(); byte[] randSyncFile = createDataFile(null); byte[] customSyncFile = createDataFile(sync); @@ -79,10 +80,10 @@ public void testRandomSync() throws IOException { } @Test - public void testCustomSync() throws IOException { + void customSync() throws IOException { byte[] sync = generateSync(); byte[] customSyncFile = createDataFile(sync); byte[] sameCustomSyncFile = createDataFile(sync); - assertTrue(Arrays.equals(customSyncFile, sameCustomSyncFile)); + assertArrayEquals(customSyncFile, sameCustomSyncFile); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileDeflate.java b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileDeflate.java index 1eb59931ecf..30eaf6f27e6 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileDeflate.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileDeflate.java @@ -17,8 +17,8 @@ */ package org.apache.avro; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -31,12 +31,12 @@ import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.util.Utf8; -import org.junit.Test; +import org.junit.jupiter.api.Test; /** Simple test of DataFileWriter and DataFileStream with deflate codec. */ public class TestDataFileDeflate { @Test - public void testWriteAndRead() throws IOException { + void writeAndRead() throws IOException { Schema schema = Schema.create(Type.STRING); // Write it diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileMeta.java b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileMeta.java index 3a70df4a6ec..30f51153c5b 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileMeta.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileMeta.java @@ -17,8 +17,7 @@ */ package org.apache.avro; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.*; import java.io.ByteArrayOutputStream; import java.io.File; @@ -30,25 +29,24 @@ import org.apache.avro.file.DataFileWriter; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; public class TestDataFileMeta { - @Rule - public TemporaryFolder DIR = new TemporaryFolder(); + @TempDir + public File DIR; - @Test(expected = AvroRuntimeException.class) - public void testUseReservedMeta() throws IOException { + @Test + public void useReservedMeta() throws IOException { try (DataFileWriter w = new DataFileWriter<>(new GenericDatumWriter<>())) { - w.setMeta("avro.foo", "bar"); + assertThrows(AvroRuntimeException.class, () -> w.setMeta("avro.foo", "bar")); } } - @Test() - public void testUseMeta() throws IOException { - File f = new File(DIR.getRoot().getPath(), "testDataFileMeta.avro"); + @Test + public void useMeta() throws IOException { + File f = new File(DIR, "testDataFileMeta.avro"); try (DataFileWriter w = new DataFileWriter<>(new GenericDatumWriter<>())) { w.setMeta("hello", "bar"); w.create(Schema.create(Type.NULL), f); @@ -62,17 +60,17 @@ public void testUseMeta() throws IOException { } - @Test(expected = AvroRuntimeException.class) - public void testUseMetaAfterCreate() throws IOException { + @Test + public void useMetaAfterCreate() throws IOException { try (DataFileWriter w = new DataFileWriter<>(new GenericDatumWriter<>())) { w.create(Schema.create(Type.NULL), new ByteArrayOutputStream()); - w.setMeta("foo", "bar"); + assertThrows(AvroRuntimeException.class, () -> w.setMeta("foo", "bar")); } } @Test - public void testBlockSizeSetInvalid() { + public void blockSizeSetInvalid() { int exceptions = 0; for (int i = -1; i < 33; i++) { // 33 invalid, one valid diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java index 8393179cdb0..3919765603b 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java @@ -17,8 +17,7 @@ */ package org.apache.avro; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.*; import java.io.EOFException; import java.io.File; @@ -38,14 +37,17 @@ import org.apache.avro.file.SeekableInput; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; -import org.junit.Test; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; @SuppressWarnings("restriction") public class TestDataFileReader { + @TempDir + public Path dataDir; - @Test // regression test for bug AVRO-2286 - public void testForLeakingFileDescriptors() throws IOException { + @Test + void forLeakingFileDescriptors() throws IOException { StringBuilder sb = new StringBuilder(); int maxTries = 3; for (int tries = 0; tries < maxTries; tries++) { @@ -66,8 +68,7 @@ public void testForLeakingFileDescriptors() throws IOException { return; // Sometimes the number of file descriptors is off due to other processes or - // garbage - // collection. We note each inconsistency and retry. + // garbage collection. We note each inconsistency and retry. sb.append(openFilesBeforeOperation).append("!=").append(openFilesAfterOperation).append(","); } fail("File descriptor leaked from new DataFileReader() over " + maxTries + " tries: (" @@ -82,17 +83,16 @@ private long getNumberOfOpenFileDescriptors() { return 0; } - @Test // regression test for bug AVRO-2944 - public void testThrottledInputStream() throws IOException { + @Test + void throttledInputStream() throws IOException { // AVRO-2944 describes hanging/failure in reading Avro file with performing // magic header check. This happens with throttled input stream, // where we read into buffer less bytes than requested. - Schema legacySchema = new Schema.Parser().setValidate(false).setValidateDefaults(false) - .parse("{\"type\": \"record\", \"name\": \"TestSchema\", \"fields\": " - + "[ {\"name\": \"id\", \"type\": [\"long\", \"null\"], \"default\": null}]}"); - File f = Files.createTempFile("testThrottledInputStream", ".avro").toFile(); + Schema legacySchema = JsonSchemaParser.parseInternal("{\"type\": \"record\", \"name\": \"TestSchema\", " + + "\"fields\": [ {\"name\": \"id\", \"type\": [\"long\", \"null\"], \"default\": null}]}"); + File f = dataDir.resolve("testThrottledInputStream.avro").toFile(); try (DataFileWriter w = new DataFileWriter<>(new GenericDatumWriter<>())) { w.create(legacySchema, f); w.flush(); @@ -141,23 +141,24 @@ public int read(byte[] b, int off, int len) throws IOException { }; } - @Test(expected = EOFException.class) // another regression test for bug AVRO-2944, testing EOF case - public void testInputStreamEOF() throws IOException { - // AVRO-2944 describes hanging/failure in reading Avro file with performing - // magic header check. This potentially happens with a defective input stream - // where a -1 value is unexpectedly returned from a read. - Schema legacySchema = new Schema.Parser().setValidate(false).setValidateDefaults(false) - .parse("{\"type\": \"record\", \"name\": \"TestSchema\", \"fields\": " - + "[ {\"name\": \"id\", \"type\": [\"long\", \"null\"], \"default\": null}]}"); - File f = Files.createTempFile("testInputStreamEOF", ".avro").toFile(); - try (DataFileWriter w = new DataFileWriter<>(new GenericDatumWriter<>())) { - w.create(legacySchema, f); - w.flush(); - } + @Test + void inputStreamEOF() throws IOException { + assertThrows(EOFException.class, () -> { + // AVRO-2944 describes hanging/failure in reading Avro file with performing + // magic header check. This potentially happens with a defective input stream + // where a -1 value is unexpectedly returned from a read. + Schema legacySchema = JsonSchemaParser.parseInternal("{\"type\": \"record\", \"name\": \"TestSchema\", " + + "\"fields\": [ {\"name\": \"id\", \"type\": [\"long\", \"null\"], \"default\": null}]}"); + File f = dataDir.resolve("testInputStreamEOF.avro").toFile(); + try (DataFileWriter w = new DataFileWriter<>(new GenericDatumWriter<>())) { + w.create(legacySchema, f); + w.flush(); + } - // Should throw an EOFException - DataFileReader.openReader(eofInputStream(f), new GenericDatumReader<>()); + // Should throw an EOFException + DataFileReader.openReader(eofInputStream(f), new GenericDatumReader<>()); + }); } private SeekableInput eofInputStream(File f) throws IOException { @@ -191,16 +192,16 @@ public int read(byte[] b, int off, int len) throws IOException { } @Test - public void testIgnoreSchemaValidationOnRead() throws IOException { + void ignoreSchemaValidationOnRead() throws IOException { // This schema has an accent in the name and the default for the field doesn't // match the first type in the union. A Java SDK in the past could create a file // containing this schema. - Schema legacySchema = new Schema.Parser().setValidate(false).setValidateDefaults(false) - .parse("{\"type\": \"record\", \"name\": \"InvalidAccÃĢntWithInvalidNull\", \"fields\": " + Schema legacySchema = JsonSchemaParser + .parseInternal("{\"type\": \"record\", \"name\": \"InvalidAccÃĢntWithInvalidNull\", \"fields\": " + "[ {\"name\": \"id\", \"type\": [\"long\", \"null\"], \"default\": null}]}"); // Create a file with the legacy schema. - File f = Files.createTempFile("testIgnoreSchemaValidationOnRead", ".avro").toFile(); + File f = dataDir.resolve("testIgnoreSchemaValidationOnRead.avro").toFile(); try (DataFileWriter w = new DataFileWriter<>(new GenericDatumWriter<>())) { w.create(legacySchema, f); w.flush(); @@ -212,23 +213,27 @@ public void testIgnoreSchemaValidationOnRead() throws IOException { } } - @Test(expected = InvalidAvroMagicException.class) - public void testInvalidMagicLength() throws IOException { - File f = Files.createTempFile("testInvalidMagicLength", ".avro").toFile(); + @Test + void invalidMagicLength() throws IOException { + File f = dataDir.resolve("testInvalidMagicLength.avro").toFile(); try (FileWriter w = new FileWriter(f)) { w.write("-"); } - - DataFileReader.openReader(new SeekableFileInput(f), new GenericDatumReader<>()); + try (SeekableFileInput fileInput = new SeekableFileInput(f)) { + assertThrows(InvalidAvroMagicException.class, + () -> DataFileReader.openReader(fileInput, new GenericDatumReader<>())); + } } - @Test(expected = InvalidAvroMagicException.class) - public void testInvalidMagicBytes() throws IOException { - File f = Files.createTempFile("testInvalidMagicBytes", ".avro").toFile(); + @Test + void invalidMagicBytes() throws IOException { + File f = dataDir.resolve("testInvalidMagicBytes.avro").toFile(); try (FileWriter w = new FileWriter(f)) { w.write("invalid"); } - - DataFileReader.openReader(new SeekableFileInput(f), new GenericDatumReader<>()); + try (SeekableFileInput fileInput = new SeekableFileInput(f)) { + assertThrows(InvalidAvroMagicException.class, + () -> DataFileReader.openReader(fileInput, new GenericDatumReader<>())); + } } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReflect.java b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReflect.java index 190f788c397..d6590b7c108 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReflect.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReflect.java @@ -17,6 +17,9 @@ */ package org.apache.avro; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; + import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; @@ -37,23 +40,22 @@ import org.apache.avro.reflect.ReflectData; import org.apache.avro.reflect.ReflectDatumReader; import org.apache.avro.reflect.ReflectDatumWriter; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; public class TestDataFileReflect { - @Rule - public TemporaryFolder DIR = new TemporaryFolder(); + @TempDir + public File DIR; /* * Test that using multiple schemas in a file works doing a union before writing * any records. */ @Test - public void testMultiReflectWithUnionBeforeWriting() throws IOException { - File file = new File(DIR.getRoot().getPath(), "testMultiReflectWithUnionBeforeWriting.avro"); + void multiReflectWithUnionBeforeWriting() throws IOException { + File file = new File(DIR.getPath(), "testMultiReflectWithUnionBeforeWriting.avro"); CheckList check = new CheckList<>(); try (FileOutputStream fos = new FileOutputStream(file)) { @@ -80,7 +82,7 @@ public void testMultiReflectWithUnionBeforeWriting() throws IOException { for (Object datum : reader) { check.assertEquals(datum, count++); } - Assert.assertEquals(count, check.size()); + assertEquals(count, check.size()); } } @@ -88,8 +90,8 @@ public void testMultiReflectWithUnionBeforeWriting() throws IOException { * Test that writing a record with a field that is null. */ @Test - public void testNull() throws IOException { - File file = new File(DIR.getRoot().getPath(), "testNull.avro"); + void testNull() throws IOException { + File file = new File(DIR.getPath(), "testNull.avro"); CheckList check = new CheckList<>(); try (FileOutputStream fos = new FileOutputStream(file)) { @@ -113,13 +115,13 @@ public void testNull() throws IOException { for (BarRecord datum : reader) { check.assertEquals(datum, count++); } - Assert.assertEquals(count, check.size()); + assertEquals(count, check.size()); } } } @Test - public void testNew() throws IOException { + void testNew() throws IOException { ByteBuffer payload = ByteBuffer.allocateDirect(8 * 1024); for (int i = 0; i < 500; i++) { payload.putInt(1); @@ -142,15 +144,15 @@ public void testNew() throws IOException { BinaryDecoder avroDecoder = DecoderFactory.get().binaryDecoder(inputStream, null); ByteBufferRecord deserialized = datumReader.read(null, avroDecoder); - Assert.assertEquals(bbr, deserialized); + assertEquals(bbr, deserialized); } /* * Test that writing out and reading in a nested class works */ @Test - public void testNestedClass() throws IOException { - File file = new File(DIR.getRoot().getPath(), "testNull.avro"); + void nestedClass() throws IOException { + File file = new File(DIR.getPath(), "testNull.avro"); CheckList check = new CheckList<>(); try (FileOutputStream fos = new FileOutputStream(file)) { @@ -171,7 +173,7 @@ public void testNestedClass() throws IOException { for (BazRecord datum : reader) { check.assertEquals(datum, count++); } - Assert.assertEquals(count, check.size()); + assertEquals(count, check.size()); } } } @@ -188,10 +190,10 @@ T addAndReturn(T check) { } void assertEquals(Object toCheck, int i) { - Assert.assertNotNull(toCheck); + assertNotNull(toCheck); Object o = get(i); - Assert.assertNotNull(o); - Assert.assertEquals(toCheck, o); + assertNotNull(o); + Assertions.assertEquals(toCheck, o); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestDecimalConversion.java b/lang/java/avro/src/test/java/org/apache/avro/TestDecimalConversion.java index 2183dd3ac11..391c886c366 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestDecimalConversion.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestDecimalConversion.java @@ -19,32 +19,26 @@ import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericFixed; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; import java.math.BigDecimal; import java.nio.ByteBuffer; import static java.math.RoundingMode.HALF_EVEN; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.*; public class TestDecimalConversion { private static final Conversion CONVERSION = new Conversions.DecimalConversion(); - @Rule - public ExpectedException expectedException = ExpectedException.none(); - private Schema smallerSchema; private LogicalType smallerLogicalType; private Schema largerSchema; private LogicalType largerLogicalType; - @Before + @BeforeEach public void setup() { smallerSchema = Schema.createFixed("smallFixed", null, null, 3); smallerSchema.addProp("logicalType", "decimal"); @@ -60,7 +54,7 @@ public void setup() { } @Test - public void testToFromBytes() { + void toFromBytes() { final BigDecimal value = BigDecimal.valueOf(10.99).setScale(15, HALF_EVEN); final ByteBuffer byteBuffer = CONVERSION.toBytes(value, largerSchema, largerLogicalType); final BigDecimal result = CONVERSION.fromBytes(byteBuffer, largerSchema, largerLogicalType); @@ -68,7 +62,7 @@ public void testToFromBytes() { } @Test - public void testToFromBytesMaxPrecision() { + void toFromBytesMaxPrecision() { final BigDecimal value = new BigDecimal("4567335489766.99834").setScale(15, HALF_EVEN); final ByteBuffer byteBuffer = CONVERSION.toBytes(value, largerSchema, largerLogicalType); final BigDecimal result = CONVERSION.fromBytes(byteBuffer, largerSchema, largerLogicalType); @@ -76,15 +70,15 @@ public void testToFromBytesMaxPrecision() { } @Test - public void testToBytesPrecisionError() { + void toBytesPrecisionError() { final BigDecimal value = new BigDecimal("1.07046455859736525E+18").setScale(15, HALF_EVEN); - expectedException.expect(AvroTypeException.class); - expectedException.expectMessage("Cannot encode decimal with precision 34 as max precision 28"); - CONVERSION.toBytes(value, largerSchema, largerLogicalType); + AvroTypeException avroTypeException = assertThrows(AvroTypeException.class, + () -> CONVERSION.toBytes(value, largerSchema, largerLogicalType)); + assertEquals("Cannot encode decimal with precision 34 as max precision 28", avroTypeException.getMessage()); } @Test - public void testToBytesFixedSmallerScale() { + void toBytesFixedSmallerScale() { final BigDecimal value = new BigDecimal("99892.1234").setScale(10, HALF_EVEN); final ByteBuffer byteBuffer = CONVERSION.toBytes(value, largerSchema, largerLogicalType); final BigDecimal result = CONVERSION.fromBytes(byteBuffer, largerSchema, largerLogicalType); @@ -92,15 +86,15 @@ public void testToBytesFixedSmallerScale() { } @Test - public void testToBytesScaleError() { + void toBytesScaleError() { final BigDecimal value = new BigDecimal("4567335489766.989989998435899453").setScale(16, HALF_EVEN); - expectedException.expect(AvroTypeException.class); - expectedException.expectMessage("Cannot encode decimal with scale 16 as scale 15 without rounding"); - CONVERSION.toBytes(value, largerSchema, largerLogicalType); + AvroTypeException avroTypeException = assertThrows(AvroTypeException.class, + () -> CONVERSION.toBytes(value, largerSchema, largerLogicalType)); + assertEquals("Cannot encode decimal with scale 16 as scale 15 without rounding", avroTypeException.getMessage()); } @Test - public void testToFromFixed() { + void toFromFixed() { final BigDecimal value = new BigDecimal("3").setScale(15, HALF_EVEN); final GenericFixed fixed = CONVERSION.toFixed(value, largerSchema, largerLogicalType); final BigDecimal result = CONVERSION.fromFixed(fixed, largerSchema, largerLogicalType); @@ -108,7 +102,7 @@ public void testToFromFixed() { } @Test - public void testToFromFixedMaxPrecision() { + void toFromFixedMaxPrecision() { final BigDecimal value = new BigDecimal("4567335489766.99834").setScale(15, HALF_EVEN); final GenericFixed fixed = CONVERSION.toFixed(value, largerSchema, largerLogicalType); final BigDecimal result = CONVERSION.fromFixed(fixed, largerSchema, largerLogicalType); @@ -116,15 +110,16 @@ public void testToFromFixedMaxPrecision() { } @Test - public void testToFixedPrecisionError() { + void toFixedPrecisionError() { final BigDecimal value = new BigDecimal("1.07046455859736525E+18").setScale(15, HALF_EVEN); - expectedException.expect(AvroTypeException.class); - expectedException.expectMessage("Cannot encode decimal with precision 34 as max precision 28"); - CONVERSION.toFixed(value, largerSchema, largerLogicalType); + + AvroTypeException avroTypeException = assertThrows(AvroTypeException.class, + () -> CONVERSION.toFixed(value, largerSchema, largerLogicalType)); + assertEquals("Cannot encode decimal with precision 34 as max precision 28", avroTypeException.getMessage()); } @Test - public void testToFromFixedSmallerScale() { + void toFromFixedSmallerScale() { final BigDecimal value = new BigDecimal("99892.1234").setScale(10, HALF_EVEN); final GenericFixed fixed = CONVERSION.toFixed(value, largerSchema, largerLogicalType); final BigDecimal result = CONVERSION.fromFixed(fixed, largerSchema, largerLogicalType); @@ -132,15 +127,16 @@ public void testToFromFixedSmallerScale() { } @Test - public void testToFixedScaleError() { + void toFixedScaleError() { final BigDecimal value = new BigDecimal("4567335489766.3453453453453453453453").setScale(16, HALF_EVEN); - expectedException.expect(AvroTypeException.class); - expectedException.expectMessage("Cannot encode decimal with scale 16 as scale 15 without rounding"); - CONVERSION.toFixed(value, largerSchema, largerLogicalType); + + AvroTypeException avroTypeException = assertThrows(AvroTypeException.class, + () -> CONVERSION.toFixed(value, largerSchema, largerLogicalType)); + assertEquals("Cannot encode decimal with scale 16 as scale 15 without rounding", avroTypeException.getMessage()); } @Test - public void testToFromFixedMatchScaleAndPrecision() { + void toFromFixedMatchScaleAndPrecision() { final BigDecimal value = new BigDecimal("123.45"); final GenericFixed fixed = CONVERSION.toFixed(value, smallerSchema, smallerLogicalType); final BigDecimal result = CONVERSION.fromFixed(fixed, smallerSchema, smallerLogicalType); @@ -148,7 +144,7 @@ public void testToFromFixedMatchScaleAndPrecision() { } @Test - public void testToFromFixedRepresentedInLogicalTypeAllowRoundUnneccesary() { + void toFromFixedRepresentedInLogicalTypeAllowRoundUnneccesary() { final BigDecimal value = new BigDecimal("123.4500"); final GenericFixed fixed = CONVERSION.toFixed(value, smallerSchema, smallerLogicalType); final BigDecimal result = CONVERSION.fromFixed(fixed, smallerSchema, smallerLogicalType); @@ -156,24 +152,27 @@ public void testToFromFixedRepresentedInLogicalTypeAllowRoundUnneccesary() { } @Test - public void testToFromFixedPrecisionErrorAfterAdjustingScale() { + void toFromFixedPrecisionErrorAfterAdjustingScale() { final BigDecimal value = new BigDecimal("1234.560"); - expectedException.expect(AvroTypeException.class); - expectedException.expectMessage( - "Cannot encode decimal with precision 6 as max precision 5. This is after safely adjusting scale from 3 to required 2"); - CONVERSION.toFixed(value, smallerSchema, smallerLogicalType); + + AvroTypeException avroTypeException = assertThrows(AvroTypeException.class, + () -> CONVERSION.toFixed(value, smallerSchema, smallerLogicalType)); + assertEquals( + "Cannot encode decimal with precision 6 as max precision 5. This is after safely adjusting scale from 3 to required 2", + avroTypeException.getMessage()); } @Test - public void testToFixedRepresentedInLogicalTypeErrorIfRoundingRequired() { + void toFixedRepresentedInLogicalTypeErrorIfRoundingRequired() { final BigDecimal value = new BigDecimal("123.456"); - expectedException.expect(AvroTypeException.class); - expectedException.expectMessage("Cannot encode decimal with scale 3 as scale 2 without rounding"); - CONVERSION.toFixed(value, smallerSchema, smallerLogicalType); + + AvroTypeException avroTypeException = assertThrows(AvroTypeException.class, + () -> CONVERSION.toFixed(value, smallerSchema, smallerLogicalType)); + assertEquals("Cannot encode decimal with scale 3 as scale 2 without rounding", avroTypeException.getMessage()); } @Test - public void testImportanceOfEnsuringCorrectScaleWhenConvertingFixed() { + void importanceOfEnsuringCorrectScaleWhenConvertingFixed() { LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) smallerLogicalType; final BigDecimal bigDecimal = new BigDecimal("1234.5"); @@ -192,7 +191,7 @@ public void testImportanceOfEnsuringCorrectScaleWhenConvertingFixed() { } @Test - public void testImportanceOfEnsuringCorrectScaleWhenConvertingBytes() { + void importanceOfEnsuringCorrectScaleWhenConvertingBytes() { LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) smallerLogicalType; final BigDecimal bigDecimal = new BigDecimal("1234.5"); diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestFixed.java b/lang/java/avro/src/test/java/org/apache/avro/TestFixed.java index a9f78f16899..f35c62d7a2e 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestFixed.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestFixed.java @@ -18,19 +18,32 @@ package org.apache.avro; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; public class TestFixed { @Test - public void testFixedDefaultValueDrop() { + void fixedDefaultValueDrop() { Schema md5 = SchemaBuilder.builder().fixed("MD5").size(16); Schema frec = SchemaBuilder.builder().record("test").fields().name("hash").type(md5).withDefault(new byte[16]) .endRecord(); Schema.Field field = frec.getField("hash"); - Assert.assertNotNull(field.defaultVal()); - Assert.assertArrayEquals(new byte[16], (byte[]) field.defaultVal()); + assertNotNull(field.defaultVal()); + assertArrayEquals(new byte[16], (byte[]) field.defaultVal()); + } + + @Test + void fixedLengthOutOfLimit() { + Exception ex = assertThrows(UnsupportedOperationException.class, + () -> Schema.createFixed("oversize", "doc", "space", Integer.MAX_VALUE)); + assertEquals(TestSystemLimitException.ERROR_VM_LIMIT_BYTES, ex.getMessage()); } + @Test + void fixedNegativeLength() { + Exception ex = assertThrows(AvroRuntimeException.class, () -> Schema.createFixed("negative", "doc", "space", -1)); + assertEquals(TestSystemLimitException.ERROR_NEGATIVE, ex.getMessage()); + } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestLogicalType.java b/lang/java/avro/src/test/java/org/apache/avro/TestLogicalType.java index 7b1f5bf5249..c6749256e09 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestLogicalType.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestLogicalType.java @@ -18,42 +18,50 @@ package org.apache.avro; +import org.hamcrest.collection.IsMapContaining; +import org.junit.jupiter.api.Test; + import java.util.Arrays; import java.util.concurrent.Callable; -import org.hamcrest.MatcherAssert; -import org.hamcrest.collection.IsMapContaining; -import org.junit.Assert; -import org.junit.Test; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNotSame; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; public class TestLogicalType { @Test - public void testDecimalFromSchema() { + void decimalFromSchema() { Schema schema = Schema.createFixed("aFixed", null, null, 4); schema.addProp("logicalType", "decimal"); schema.addProp("precision", 9); schema.addProp("scale", 2); LogicalType logicalType = LogicalTypes.fromSchemaIgnoreInvalid(schema); - Assert.assertTrue("Should be a Decimal", logicalType instanceof LogicalTypes.Decimal); + assertTrue(logicalType instanceof LogicalTypes.Decimal, "Should be a Decimal"); LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) logicalType; - Assert.assertEquals("Should have correct precision", 9, decimal.getPrecision()); - Assert.assertEquals("Should have correct scale", 2, decimal.getScale()); + assertEquals(9, decimal.getPrecision(), "Should have correct precision"); + assertEquals(2, decimal.getScale(), "Should have correct scale"); } @Test - public void testInvalidLogicalTypeIgnored() { + void invalidLogicalTypeIgnored() { final Schema schema = Schema.createFixed("aFixed", null, null, 2); schema.addProp("logicalType", "decimal"); schema.addProp("precision", 9); schema.addProp("scale", 2); - Assert.assertNull("Should ignore invalid logical type", LogicalTypes.fromSchemaIgnoreInvalid(schema)); + assertNull(LogicalTypes.fromSchemaIgnoreInvalid(schema), "Should ignore invalid logical type"); } @Test - public void testDecimalWithNonByteArrayTypes() { + void decimalWithNonByteArrayTypes() { final LogicalType decimal = LogicalTypes.decimal(5, 2); // test simple types Schema[] nonBytes = new Schema[] { Schema.createRecord("Record", null, null, false), @@ -73,25 +81,26 @@ public void testDecimalWithNonByteArrayTypes() { } @Test - public void testUnknownFromJsonNode() { + void unknownFromJsonNode() { Schema schema = Schema.create(Schema.Type.STRING); schema.addProp("logicalType", "unknown"); schema.addProp("someProperty", 34); LogicalType logicalType = LogicalTypes.fromSchemaIgnoreInvalid(schema); - Assert.assertNull("Should not return a LogicalType instance", logicalType); + assertNull(logicalType, "Should not return a LogicalType instance"); } @Test - public void testDecimalBytesHasNoPrecisionLimit() { + void decimalBytesHasNoPrecisionLimit() { Schema schema = Schema.create(Schema.Type.BYTES); // precision is not limited for bytes LogicalTypes.decimal(Integer.MAX_VALUE).addToSchema(schema); - Assert.assertEquals("Precision should be an Integer.MAX_VALUE", Integer.MAX_VALUE, - ((LogicalTypes.Decimal) LogicalTypes.fromSchemaIgnoreInvalid(schema)).getPrecision()); + assertEquals(Integer.MAX_VALUE, + ((LogicalTypes.Decimal) LogicalTypes.fromSchemaIgnoreInvalid(schema)).getPrecision(), + "Precision should be an Integer.MAX_VALUE"); } @Test - public void testDecimalFixedPrecisionLimit() { + void decimalFixedPrecisionLimit() { // 4 bytes can hold up to 9 digits of precision final Schema schema = Schema.createFixed("aDecimal", null, null, 4); assertThrows("Should reject precision", IllegalArgumentException.class, "fixed(4) cannot store 10 digits (max 9)", @@ -99,7 +108,7 @@ public void testDecimalFixedPrecisionLimit() { LogicalTypes.decimal(10).addToSchema(schema); return null; }); - Assert.assertNull("Invalid logical type should not be set on schema", LogicalTypes.fromSchemaIgnoreInvalid(schema)); + assertNull(LogicalTypes.fromSchemaIgnoreInvalid(schema), "Invalid logical type should not be set on schema"); // 129 bytes can hold up to 310 digits of precision final Schema schema129 = Schema.createFixed("aDecimal", null, null, 129); @@ -108,56 +117,55 @@ public void testDecimalFixedPrecisionLimit() { LogicalTypes.decimal(311).addToSchema(schema129); return null; }); - Assert.assertNull("Invalid logical type should not be set on schema", - LogicalTypes.fromSchemaIgnoreInvalid(schema129)); + assertNull(LogicalTypes.fromSchemaIgnoreInvalid(schema129), "Invalid logical type should not be set on schema"); } @Test - public void testDecimalFailsWithZeroPrecision() { + void decimalFailsWithZeroPrecision() { final Schema schema = Schema.createFixed("aDecimal", null, null, 4); assertThrows("Should reject precision", IllegalArgumentException.class, "Invalid decimal precision: 0 (must be positive)", () -> { LogicalTypes.decimal(0).addToSchema(schema); return null; }); - Assert.assertNull("Invalid logical type should not be set on schema", LogicalTypes.fromSchemaIgnoreInvalid(schema)); + assertNull(LogicalTypes.fromSchemaIgnoreInvalid(schema), "Invalid logical type should not be set on schema"); } @Test - public void testDecimalFailsWithNegativePrecision() { + void decimalFailsWithNegativePrecision() { final Schema schema = Schema.createFixed("aDecimal", null, null, 4); assertThrows("Should reject precision", IllegalArgumentException.class, "Invalid decimal precision: -9 (must be positive)", () -> { LogicalTypes.decimal(-9).addToSchema(schema); return null; }); - Assert.assertNull("Invalid logical type should not be set on schema", LogicalTypes.fromSchemaIgnoreInvalid(schema)); + assertNull(LogicalTypes.fromSchemaIgnoreInvalid(schema), "Invalid logical type should not be set on schema"); } @Test - public void testDecimalScaleBoundedByPrecision() { + void decimalScaleBoundedByPrecision() { final Schema schema = Schema.createFixed("aDecimal", null, null, 4); assertThrows("Should reject precision", IllegalArgumentException.class, "Invalid decimal scale: 10 (greater than precision: 9)", () -> { LogicalTypes.decimal(9, 10).addToSchema(schema); return null; }); - Assert.assertNull("Invalid logical type should not be set on schema", LogicalTypes.fromSchemaIgnoreInvalid(schema)); + assertNull(LogicalTypes.fromSchemaIgnoreInvalid(schema), "Invalid logical type should not be set on schema"); } @Test - public void testDecimalFailsWithNegativeScale() { + void decimalFailsWithNegativeScale() { final Schema schema = Schema.createFixed("aDecimal", null, null, 4); assertThrows("Should reject precision", IllegalArgumentException.class, "Invalid decimal scale: -2 (must be positive)", () -> { LogicalTypes.decimal(9, -2).addToSchema(schema); return null; }); - Assert.assertNull("Invalid logical type should not be set on schema", LogicalTypes.fromSchemaIgnoreInvalid(schema)); + assertNull(LogicalTypes.fromSchemaIgnoreInvalid(schema), "Invalid logical type should not be set on schema"); } @Test - public void testSchemaRejectsSecondLogicalType() { + void schemaRejectsSecondLogicalType() { final Schema schema = Schema.createFixed("aDecimal", null, null, 4); LogicalTypes.decimal(9).addToSchema(schema); assertThrows("Should reject second logical type", AvroRuntimeException.class, "Can't overwrite property: scale", @@ -165,37 +173,61 @@ public void testSchemaRejectsSecondLogicalType() { LogicalTypes.decimal(9, 2).addToSchema(schema); return null; }); - Assert.assertEquals("First logical type should still be set on schema", LogicalTypes.decimal(9), - LogicalTypes.fromSchemaIgnoreInvalid(schema)); + assertEquals(LogicalTypes.decimal(9), LogicalTypes.fromSchemaIgnoreInvalid(schema), + "First logical type should still be set on schema"); } @Test - public void testDecimalDefaultScale() { + void decimalDefaultScale() { Schema schema = Schema.createFixed("aDecimal", null, null, 4); // 4 bytes can hold up to 9 digits of precision LogicalTypes.decimal(9).addToSchema(schema); - Assert.assertEquals("Scale should be a 0", 0, - ((LogicalTypes.Decimal) LogicalTypes.fromSchemaIgnoreInvalid(schema)).getScale()); + assertEquals(0, ((LogicalTypes.Decimal) LogicalTypes.fromSchemaIgnoreInvalid(schema)).getScale(), + "Scale should be a 0"); } @Test - public void testFixedDecimalToFromJson() { + void fixedDecimalToFromJson() { Schema schema = Schema.createFixed("aDecimal", null, null, 4); LogicalTypes.decimal(9, 2).addToSchema(schema); - Schema parsed = new Schema.Parser().parse(schema.toString(true)); - Assert.assertEquals("Constructed and parsed schemas should match", schema, parsed); + Schema parsed = SchemaParser.parseSingle(schema.toString(true)); + assertEquals(schema, parsed, "Constructed and parsed schemas should match"); } @Test - public void testBytesDecimalToFromJson() { + void bytesDecimalToFromJson() { Schema schema = Schema.create(Schema.Type.BYTES); LogicalTypes.decimal(9, 2).addToSchema(schema); - Schema parsed = new Schema.Parser().parse(schema.toString(true)); - Assert.assertEquals("Constructed and parsed schemas should match", schema, parsed); + Schema parsed = SchemaParser.parseSingle(schema.toString(true)); + assertEquals(schema, parsed, "Constructed and parsed schemas should match"); + } + + @Test + void uuidExtendsString() { + Schema uuidSchema = LogicalTypes.uuid().addToSchema(Schema.create(Schema.Type.STRING)); + assertEquals(LogicalTypes.uuid(), uuidSchema.getLogicalType()); + + assertThrows("UUID requires a string", IllegalArgumentException.class, + "Uuid can only be used with an underlying string or fixed type", + () -> LogicalTypes.uuid().addToSchema(Schema.create(Schema.Type.INT))); } @Test - public void testLogicalTypeEquals() { + void durationExtendsFixed12() { + Schema durationSchema = LogicalTypes.duration().addToSchema(Schema.createFixed("f", null, null, 12)); + assertEquals(LogicalTypes.duration(), LogicalTypes.fromSchema(durationSchema)); + + assertThrows("Duration requires a fixed(12)", IllegalArgumentException.class, + "Duration can only be used with an underlying fixed type of size 12.", + () -> LogicalTypes.duration().addToSchema(Schema.create(Schema.Type.INT))); + + assertThrows("Duration requires a fixed(12)", IllegalArgumentException.class, + "Duration can only be used with an underlying fixed type of size 12.", + () -> LogicalTypes.duration().addToSchema(Schema.createFixed("wrong", null, null, 42))); + } + + @Test + void logicalTypeEquals() { LogicalTypes.Decimal decimal90 = LogicalTypes.decimal(9); LogicalTypes.Decimal decimal80 = LogicalTypes.decimal(8); LogicalTypes.Decimal decimal92 = LogicalTypes.decimal(9, 2); @@ -209,12 +241,12 @@ public void testLogicalTypeEquals() { } @Test - public void testLogicalTypeInSchemaEquals() { + void logicalTypeInSchemaEquals() { Schema schema1 = Schema.createFixed("aDecimal", null, null, 4); Schema schema2 = Schema.createFixed("aDecimal", null, null, 4); Schema schema3 = Schema.createFixed("aDecimal", null, null, 4); - Assert.assertNotSame(schema1, schema2); - Assert.assertNotSame(schema1, schema3); + assertNotSame(schema1, schema2); + assertNotSame(schema1, schema3); assertEqualsTrue("No logical types", schema1, schema2); assertEqualsTrue("No logical types", schema1, schema3); @@ -229,7 +261,7 @@ public void testLogicalTypeInSchemaEquals() { } @Test - public void testRegisterLogicalTypeThrowsIfTypeNameNotProvided() { + void registerLogicalTypeThrowsIfTypeNameNotProvided() { assertThrows("Should error if type name was not provided", UnsupportedOperationException.class, "LogicalTypeFactory TypeName has not been provided", () -> { LogicalTypes.register(schema -> LogicalTypes.date()); @@ -238,7 +270,7 @@ public void testRegisterLogicalTypeThrowsIfTypeNameNotProvided() { } @Test - public void testRegisterLogicalTypeWithName() { + void registerLogicalTypeWithName() { final LogicalTypes.LogicalTypeFactory factory = new LogicalTypes.LogicalTypeFactory() { @Override public LogicalType fromSchema(Schema schema) { @@ -253,11 +285,11 @@ public String getTypeName() { LogicalTypes.register("registered", factory); - MatcherAssert.assertThat(LogicalTypes.getCustomRegisteredTypes(), IsMapContaining.hasEntry("registered", factory)); + assertThat(LogicalTypes.getCustomRegisteredTypes(), IsMapContaining.hasEntry("registered", factory)); } @Test - public void testRegisterLogicalTypeWithFactoryName() { + void registerLogicalTypeWithFactoryName() { final LogicalTypes.LogicalTypeFactory factory = new LogicalTypes.LogicalTypeFactory() { @Override public LogicalType fromSchema(Schema schema) { @@ -272,27 +304,32 @@ public String getTypeName() { LogicalTypes.register(factory); - MatcherAssert.assertThat(LogicalTypes.getCustomRegisteredTypes(), IsMapContaining.hasEntry("factory", factory)); + assertThat(LogicalTypes.getCustomRegisteredTypes(), IsMapContaining.hasEntry("factory", factory)); } @Test - public void testRegisterLogicalTypeWithFactoryNameNotProvided() { + void registerLogicalTypeWithFactoryNameNotProvided() { final LogicalTypes.LogicalTypeFactory factory = schema -> LogicalTypes.date(); LogicalTypes.register("logicalTypeName", factory); - MatcherAssert.assertThat(LogicalTypes.getCustomRegisteredTypes(), - IsMapContaining.hasEntry("logicalTypeName", factory)); + assertThat(LogicalTypes.getCustomRegisteredTypes(), IsMapContaining.hasEntry("logicalTypeName", factory)); + } + + @Test + public void testRegisterLogicalTypeFactoryByServiceLoader() { + assertThat(LogicalTypes.getCustomRegisteredTypes(), + IsMapContaining.hasEntry(equalTo("custom"), instanceOf(LogicalTypes.LogicalTypeFactory.class))); } public static void assertEqualsTrue(String message, Object o1, Object o2) { - Assert.assertTrue("Should be equal (forward): " + message, o1.equals(o2)); - Assert.assertTrue("Should be equal (reverse): " + message, o2.equals(o1)); + assertEquals(o1, o2, "Should be equal (forward): " + message); + assertEquals(o2, o1, "Should be equal (reverse): " + message); } public static void assertEqualsFalse(String message, Object o1, Object o2) { - Assert.assertFalse("Should be equal (forward): " + message, o1.equals(o2)); - Assert.assertFalse("Should be equal (reverse): " + message, o2.equals(o1)); + assertNotEquals(o1, o2, "Should be equal (forward): " + message); + assertNotEquals(o2, o1, "Should be equal (reverse): " + message); } /** @@ -305,14 +342,14 @@ public static void assertEqualsFalse(String message, Object o1, Object o2) { * @param callable A Callable that is expected to throw the exception */ public static void assertThrows(String message, Class expected, String containedInMessage, - Callable callable) { + Callable callable) { try { callable.call(); - Assert.fail("No exception was thrown (" + message + "), expected: " + expected.getName()); + fail("No exception was thrown (" + message + "), expected: " + expected.getName()); } catch (Exception actual) { - Assert.assertEquals(message, expected, actual.getClass()); - Assert.assertTrue("Expected exception message (" + containedInMessage + ") missing: " + actual.getMessage(), - actual.getMessage().contains(containedInMessage)); + assertEquals(expected, actual.getClass(), message); + assertTrue(actual.getMessage().contains(containedInMessage), + "Expected exception message (" + containedInMessage + ") missing: " + actual.getMessage()); } } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestNestedRecords.java b/lang/java/avro/src/test/java/org/apache/avro/TestNestedRecords.java index 0e9d08e95fb..e697830c34f 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestNestedRecords.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestNestedRecords.java @@ -22,14 +22,13 @@ import org.apache.avro.io.DatumReader; import org.apache.avro.io.DecoderFactory; import org.apache.avro.io.JsonDecoder; -import org.junit.Test; - +import org.junit.jupiter.api.Test; import java.io.ByteArrayInputStream; import java.io.IOException; import static java.nio.charset.StandardCharsets.UTF_8; import static org.hamcrest.CoreMatchers.equalTo; -import static org.junit.Assert.assertThat; +import static org.hamcrest.MatcherAssert.assertThat; /** * This test demonstrates the fix for a complex nested schema type. @@ -37,7 +36,7 @@ public class TestNestedRecords { @Test - public void testSingleSubRecord() throws IOException { + void singleSubRecord() throws IOException { final Schema child = SchemaBuilder.record("Child").namespace("org.apache.avro.nested").fields() .requiredString("childField").endRecord(); @@ -64,7 +63,7 @@ public void testSingleSubRecord() throws IOException { } @Test - public void testSingleSubRecordExtraField() throws IOException { + void singleSubRecordExtraField() throws IOException { final Schema child = SchemaBuilder.record("Child").namespace("org.apache.avro.nested").fields() .requiredString("childField").endRecord(); diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestProtocol.java b/lang/java/avro/src/test/java/org/apache/avro/TestProtocol.java index 3c63cca9b18..cc59a6cd996 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestProtocol.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestProtocol.java @@ -17,23 +17,115 @@ */ package org.apache.avro; -import static org.junit.Assert.*; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.generic.IndexedRecord; +import org.apache.avro.io.EncoderFactory; +import org.apache.avro.io.JsonEncoder; -import org.junit.Test; +import com.fasterxml.jackson.databind.JsonNode; + +import static java.util.Collections.emptyList; +import static java.util.Collections.emptyMap; +import static java.util.Collections.singletonList; +import static java.util.Collections.singletonMap; +import static org.junit.jupiter.api.Assertions.*; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; +import java.util.Collections; + +import org.junit.jupiter.api.Test; public class TestProtocol { @Test - public void testPropEquals() { + public void parse() throws IOException { + File fic = new File("target/test-classes/share/test/schemas/namespace.avpr"); + Protocol protocol = Protocol.parse(fic); + assertNotNull(protocol); + assertEquals("TestNamespace", protocol.getName()); + } + + /** + * record type 'User' contains a field of type 'Status', which contains a field + * of type 'User'. + */ + @Test + public void crossProtocol() { + String userStatus = "{ \"protocol\" : \"p1\", " + "\"types\": [" + + "{\"name\": \"User\", \"type\": \"record\", \"fields\": [{\"name\": \"current_status\", \"type\": \"Status\"}]},\n" + + "\n" + + "{\"name\": \"Status\", \"type\": \"record\", \"fields\": [{\"name\": \"author\", \"type\": \"User\"}]}" + + "]}"; + + Protocol protocol = Protocol.parse(userStatus); + Schema userSchema = protocol.getType("User"); + Schema statusSchema = protocol.getType("Status"); + assertSame(statusSchema, userSchema.getField("current_status").schema()); + assertSame(userSchema, statusSchema.getField("author").schema()); + + String parsingFormUser = SchemaNormalization.toParsingForm(userSchema); + assertEquals( + "{\"name\":\"User\",\"type\":\"record\",\"fields\":[{\"name\":\"current_status\",\"type\":{\"name\":\"Status\",\"type\":\"record\",\"fields\":[{\"name\":\"author\",\"type\":\"User\"}]}}]}", + parsingFormUser); + + String parsingFormStatus = SchemaNormalization.toParsingForm(statusSchema); + assertEquals( + "{\"name\":\"Status\",\"type\":\"record\",\"fields\":[{\"name\":\"author\",\"type\":{\"name\":\"User\",\"type\":\"record\",\"fields\":[{\"name\":\"current_status\",\"type\":\"Status\"}]}}]}", + parsingFormStatus); + } + + /** + * When one schema with a type used before it is defined, test normalization + * defined schema before it is used. + */ + @Test + void normalization() { + final String schema = "{\n" + " \"type\":\"record\", \"name\": \"Main\", " + " \"fields\":[\n" + + " { \"name\":\"f1\", \"type\":\"Sub\" },\n" // use Sub + + " { \"name\":\"f2\", " + " \"type\":{\n" + " \"type\":\"enum\", \"name\":\"Sub\",\n" // define + // Sub + + " \"symbols\":[\"OPEN\",\"CLOSE\"]\n" + " }\n" + " }\n" + " ]\n" + "}"; + Schema s = SchemaParser.parseSingle(schema); + assertNotNull(s); + + String parsingForm = SchemaNormalization.toParsingForm(s); + assertEquals( + "{\"name\":\"Main\",\"type\":\"record\",\"fields\":[{\"name\":\"f1\",\"type\":{\"name\":\"Sub\",\"type\":\"enum\",\"symbols\":[\"OPEN\",\"CLOSE\"]}},{\"name\":\"f2\",\"type\":\"Sub\"}]}", + parsingForm); + } + + @Test + void namespaceAndNameRules() { + Protocol p1 = new Protocol("P", null, "foo"); + Protocol p2 = new Protocol("foo.P", null, null); + Protocol p3 = new Protocol("foo.P", null, "bar"); + assertEquals(p1.getName(), p2.getName()); + assertEquals(p1.getNamespace(), p2.getNamespace()); + assertEquals(p1.getName(), p3.getName()); + assertEquals(p1.getNamespace(), p3.getNamespace()); + + // The following situation is allowed, even if confusing, because the + // specification describes this algorithm without specifying that the resulting + // namespace mst be non-empty. + Protocol invalidName = new Protocol(".P", null, "ignored"); + assertNull(invalidName.getNamespace()); + assertEquals("P", invalidName.getName()); + } + + @Test + void propEquals() { Protocol p1 = new Protocol("P", null, "foo"); p1.addProp("a", "1"); Protocol p2 = new Protocol("P", null, "foo"); p2.addProp("a", "2"); - assertFalse(p1.equals(p2)); + assertNotEquals(p1, p2); } @Test - public void testSplitProtocolBuild() { + void splitProtocolBuild() { Protocol p = new Protocol("P", null, "foo"); p.addProp("property", "some value"); @@ -48,4 +140,17 @@ public void testSplitProtocolBuild() { assertNotNull(parsedArrayOfStringProtocol); assertEquals(parsedStringProtocol.toString(), parsedArrayOfStringProtocol.toString()); } + + @Test + void copyMessage() { + Protocol p = new Protocol("P", "protocol", "foo"); + Schema req1 = SchemaBuilder.record("foo.req1").fields().endRecord(); + Protocol.Message m1 = p.createMessage("M", "message", singletonMap("foo", "bar"), req1); + Schema req2 = SchemaBuilder.record("foo.req2").fields().name("test").type().booleanType().noDefault().endRecord(); + + Protocol.Message m2 = p.createMessage(m1, req2); + assertEquals(m1.getName(), m2.getName()); + assertEquals(m1.getDoc(), m2.getDoc()); + assertEquals(m1.getProp("foo"), m2.getProp("foo")); + } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java b/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java index 47cafcec189..9fc764af972 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java @@ -17,16 +17,15 @@ */ package org.apache.avro; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; - import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.Collection; + +import static java.util.Collections.emptyList; +import static org.junit.jupiter.api.Assertions.*; + import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericData.EnumSymbol; import org.apache.avro.generic.GenericData.Record; @@ -38,24 +37,16 @@ import org.apache.avro.io.DecoderFactory; import org.apache.avro.io.Encoder; import org.apache.avro.io.EncoderFactory; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; - -@RunWith(Parameterized.class) + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.EnumSource; + public class TestReadingWritingDataInEvolvedSchemas { private static final String RECORD_A = "RecordA"; private static final String FIELD_A = "fieldA"; private static final char LATIN_SMALL_LETTER_O_WITH_DIARESIS = '\u00F6'; - @Rule - public ExpectedException expectedException = ExpectedException.none(); - private static final Schema DOUBLE_RECORD = SchemaBuilder.record(RECORD_A) // .fields() // .name(FIELD_A).type().doubleType().noDefault() // @@ -89,13 +80,18 @@ public class TestReadingWritingDataInEvolvedSchemas { .fields() // .name(FIELD_A).type().unionOf().stringType().and().bytesType().endUnion().noDefault() // .endRecord(); + + private static final Schema ENUM_AB = SchemaBuilder.enumeration("Enum1").symbols("A", "B"); + private static final Schema ENUM_AB_RECORD = SchemaBuilder.record(RECORD_A) // .fields() // - .name(FIELD_A).type().enumeration("Enum1").symbols("A", "B").noDefault() // + .name(FIELD_A).type(ENUM_AB).noDefault() // .endRecord(); + + private static final Schema ENUM_ABC = SchemaBuilder.enumeration("Enum1").symbols("A", "B", "C"); private static final Schema ENUM_ABC_RECORD = SchemaBuilder.record(RECORD_A) // .fields() // - .name(FIELD_A).type().enumeration("Enum1").symbols("A", "B", "C").noDefault() // + .name(FIELD_A).type(ENUM_ABC).noDefault() // .endRecord(); private static final Schema UNION_INT_RECORD = SchemaBuilder.record(RECORD_A) // .fields() // @@ -122,221 +118,240 @@ public class TestReadingWritingDataInEvolvedSchemas { .name(FIELD_A).type().unionOf().floatType().and().doubleType().endUnion().noDefault() // .endRecord(); - @Parameters(name = "encoder = {0}") - public static Collection data() { - return Arrays.asList(new EncoderType[][] { { EncoderType.BINARY }, { EncoderType.JSON } }); - } - - public TestReadingWritingDataInEvolvedSchemas(EncoderType encoderType) { - this.encoderType = encoderType; - } - - private final EncoderType encoderType; + private static final Schema UNION_WITH_EMPTY_ARRAY_DEFAULT_RECORD = SchemaBuilder.record(RECORD_A) // + .fields() // + .name(FIELD_A).type().unionOf().array().items(INT_RECORD).and().nullType().endUnion().arrayDefault(emptyList()) // + .endRecord(); enum EncoderType { BINARY, JSON } - @Test - public void doubleWrittenWithUnionSchemaIsConvertedToDoubleSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void doubleWrittenWithUnionSchemaIsConvertedToDoubleSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_INT_LONG_FLOAT_DOUBLE_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42.0); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(DOUBLE_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(DOUBLE_RECORD, writer, encoded, encoderType); assertEquals(42.0, decoded.get(FIELD_A)); } - @Test - public void longWrittenWithUnionSchemaIsConvertedToUnionLongFloatSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void longWrittenWithUnionSchemaIsConvertedToUnionLongFloatSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_LONG_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42L); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(UNION_LONG_FLOAT_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(UNION_LONG_FLOAT_RECORD, writer, encoded, encoderType); assertEquals(42L, decoded.get(FIELD_A)); } - @Test - public void longWrittenWithUnionSchemaIsConvertedToDoubleSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void longWrittenWithUnionSchemaIsConvertedToDoubleSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_LONG_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42L); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(UNION_DOUBLE_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(UNION_DOUBLE_RECORD, writer, encoded, encoderType); assertEquals(42.0, decoded.get(FIELD_A)); } - @Test - public void intWrittenWithUnionSchemaIsConvertedToDoubleSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void intWrittenWithUnionSchemaIsConvertedToDoubleSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_INT_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(UNION_DOUBLE_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(UNION_DOUBLE_RECORD, writer, encoded, encoderType); assertEquals(42.0, decoded.get(FIELD_A)); } - @Test - public void intWrittenWithUnionSchemaIsReadableByFloatSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void intWrittenWithUnionSchemaIsReadableByFloatSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_INT_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(FLOAT_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(FLOAT_RECORD, writer, encoded, encoderType); assertEquals(42.0f, decoded.get(FIELD_A)); } - @Test - public void intWrittenWithUnionSchemaIsReadableByFloatUnionSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void intWrittenWithUnionSchemaIsReadableByFloatUnionSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_INT_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(UNION_FLOAT_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(UNION_FLOAT_RECORD, writer, encoded, encoderType); assertEquals(42.0f, decoded.get(FIELD_A)); } - @Test - public void longWrittenWithUnionSchemaIsReadableByFloatSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void longWrittenWithUnionSchemaIsReadableByFloatSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_LONG_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42L); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(FLOAT_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(FLOAT_RECORD, writer, encoded, encoderType); assertEquals(42.0f, decoded.get(FIELD_A)); } - @Test - public void longWrittenWithUnionSchemaIsReadableByFloatUnionSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void longWrittenWithUnionSchemaIsReadableByFloatUnionSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_LONG_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42L); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(UNION_FLOAT_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(UNION_FLOAT_RECORD, writer, encoded, encoderType); assertEquals(42.0f, decoded.get(FIELD_A)); } - @Test - public void longWrittenWithUnionSchemaIsConvertedToLongFloatUnionSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void longWrittenWithUnionSchemaIsConvertedToLongFloatUnionSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_LONG_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42L); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(UNION_LONG_FLOAT_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(UNION_LONG_FLOAT_RECORD, writer, encoded, encoderType); assertEquals(42L, decoded.get(FIELD_A)); } - @Test - public void longWrittenWithUnionSchemaIsConvertedToFloatDoubleUnionSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void longWrittenWithUnionSchemaIsConvertedToFloatDoubleUnionSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_LONG_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42L); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(UNION_FLOAT_DOUBLE_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(UNION_FLOAT_DOUBLE_RECORD, writer, encoded, encoderType); assertEquals(42.0F, decoded.get(FIELD_A)); } - @Test - public void doubleWrittenWithUnionSchemaIsNotConvertedToFloatSchema() throws Exception { - expectedException.expect(AvroTypeException.class); - expectedException.expectMessage("Found double, expecting float"); + @ParameterizedTest + @EnumSource(EncoderType.class) + void doubleWrittenWithUnionSchemaIsNotConvertedToFloatSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_INT_LONG_FLOAT_DOUBLE_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42.0); - byte[] encoded = encodeGenericBlob(record); - decodeGenericBlob(FLOAT_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + AvroTypeException exception = assertThrows(AvroTypeException.class, + () -> decodeGenericBlob(FLOAT_RECORD, writer, encoded, encoderType)); + assertEquals("Found double, expecting float", exception.getMessage()); } - @Test - public void floatWrittenWithUnionSchemaIsNotConvertedToLongSchema() throws Exception { - expectedException.expect(AvroTypeException.class); - expectedException.expectMessage("Found float, expecting long"); + @ParameterizedTest + @EnumSource(EncoderType.class) + void floatWrittenWithUnionSchemaIsNotConvertedToLongSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_INT_LONG_FLOAT_DOUBLE_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42.0f); - byte[] encoded = encodeGenericBlob(record); - decodeGenericBlob(LONG_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + AvroTypeException exception = assertThrows(AvroTypeException.class, + () -> decodeGenericBlob(LONG_RECORD, writer, encoded, encoderType)); + assertEquals("Found float, expecting long", exception.getMessage()); } - @Test - public void longWrittenWithUnionSchemaIsNotConvertedToIntSchema() throws Exception { - expectedException.expect(AvroTypeException.class); - expectedException.expectMessage("Found long, expecting int"); + @ParameterizedTest + @EnumSource(EncoderType.class) + void longWrittenWithUnionSchemaIsNotConvertedToIntSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_INT_LONG_FLOAT_DOUBLE_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42L); - byte[] encoded = encodeGenericBlob(record); - decodeGenericBlob(INT_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + AvroTypeException exception = assertThrows(AvroTypeException.class, + () -> decodeGenericBlob(INT_RECORD, writer, encoded, encoderType)); + assertEquals("Found long, expecting int", exception.getMessage()); } - @Test - public void intWrittenWithUnionSchemaIsConvertedToAllNumberSchemas() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void intWrittenWithUnionSchemaIsConvertedToAllNumberSchemas(EncoderType encoderType) throws Exception { Schema writer = UNION_INT_LONG_FLOAT_DOUBLE_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42); - byte[] encoded = encodeGenericBlob(record); - assertEquals(42.0, decodeGenericBlob(DOUBLE_RECORD, writer, encoded).get(FIELD_A)); - assertEquals(42.0f, decodeGenericBlob(FLOAT_RECORD, writer, encoded).get(FIELD_A)); - assertEquals(42L, decodeGenericBlob(LONG_RECORD, writer, encoded).get(FIELD_A)); - assertEquals(42, decodeGenericBlob(INT_RECORD, writer, encoded).get(FIELD_A)); + byte[] encoded = encodeGenericBlob(record, encoderType); + assertEquals(42.0, decodeGenericBlob(DOUBLE_RECORD, writer, encoded, encoderType).get(FIELD_A)); + assertEquals(42.0f, decodeGenericBlob(FLOAT_RECORD, writer, encoded, encoderType).get(FIELD_A)); + assertEquals(42L, decodeGenericBlob(LONG_RECORD, writer, encoded, encoderType).get(FIELD_A)); + assertEquals(42, decodeGenericBlob(INT_RECORD, writer, encoded, encoderType).get(FIELD_A)); } - @Test - public void asciiStringWrittenWithUnionSchemaIsConvertedToBytesSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void asciiStringWrittenWithUnionSchemaIsConvertedToBytesSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_STRING_BYTES_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, "42"); - byte[] encoded = encodeGenericBlob(record); - ByteBuffer actual = (ByteBuffer) decodeGenericBlob(BYTES_RECORD, writer, encoded).get(FIELD_A); + byte[] encoded = encodeGenericBlob(record, encoderType); + ByteBuffer actual = (ByteBuffer) decodeGenericBlob(BYTES_RECORD, writer, encoded, encoderType).get(FIELD_A); assertArrayEquals("42".getBytes(StandardCharsets.UTF_8), actual.array()); } - @Test - public void utf8StringWrittenWithUnionSchemaIsConvertedToBytesSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void utf8StringWrittenWithUnionSchemaIsConvertedToBytesSchema(EncoderType encoderType) throws Exception { String goeran = String.format("G%sran", LATIN_SMALL_LETTER_O_WITH_DIARESIS); Schema writer = UNION_STRING_BYTES_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, goeran); - byte[] encoded = encodeGenericBlob(record); - ByteBuffer actual = (ByteBuffer) decodeGenericBlob(BYTES_RECORD, writer, encoded).get(FIELD_A); + byte[] encoded = encodeGenericBlob(record, encoderType); + ByteBuffer actual = (ByteBuffer) decodeGenericBlob(BYTES_RECORD, writer, encoded, encoderType).get(FIELD_A); assertArrayEquals(goeran.getBytes(StandardCharsets.UTF_8), actual.array()); } - @Test - public void asciiBytesWrittenWithUnionSchemaIsConvertedToStringSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void asciiBytesWrittenWithUnionSchemaIsConvertedToStringSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_STRING_BYTES_RECORD; ByteBuffer buf = ByteBuffer.wrap("42".getBytes(StandardCharsets.UTF_8)); Record record = defaultRecordWithSchema(writer, FIELD_A, buf); - byte[] encoded = encodeGenericBlob(record); - CharSequence read = (CharSequence) decodeGenericBlob(STRING_RECORD, writer, encoded).get(FIELD_A); + byte[] encoded = encodeGenericBlob(record, encoderType); + CharSequence read = (CharSequence) decodeGenericBlob(STRING_RECORD, writer, encoded, encoderType).get(FIELD_A); assertEquals("42", read.toString()); } - @Test - public void utf8BytesWrittenWithUnionSchemaIsConvertedToStringSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void utf8BytesWrittenWithUnionSchemaIsConvertedToStringSchema(EncoderType encoderType) throws Exception { String goeran = String.format("G%sran", LATIN_SMALL_LETTER_O_WITH_DIARESIS); Schema writer = UNION_STRING_BYTES_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, goeran); - byte[] encoded = encodeGenericBlob(record); - CharSequence read = (CharSequence) decodeGenericBlob(STRING_RECORD, writer, encoded).get(FIELD_A); + byte[] encoded = encodeGenericBlob(record, encoderType); + CharSequence read = (CharSequence) decodeGenericBlob(STRING_RECORD, writer, encoded, encoderType).get(FIELD_A); assertEquals(goeran, read.toString()); } - @Test - public void enumRecordCanBeReadWithExtendedEnumSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void enumRecordCanBeReadWithExtendedEnumSchema(EncoderType encoderType) throws Exception { Schema writer = ENUM_AB_RECORD; - Record record = defaultRecordWithSchema(writer, FIELD_A, new EnumSymbol(writer, "A")); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(ENUM_ABC_RECORD, writer, encoded); + Record record = defaultRecordWithSchema(writer, FIELD_A, new EnumSymbol(ENUM_AB, "A")); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(ENUM_ABC_RECORD, writer, encoded, encoderType); assertEquals("A", decoded.get(FIELD_A).toString()); } - @Test - public void enumRecordWithExtendedSchemaCanBeReadWithOriginalEnumSchemaIfOnlyOldValues() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void enumRecordWithExtendedSchemaCanBeReadWithOriginalEnumSchemaIfOnlyOldValues(EncoderType encoderType) + throws Exception { Schema writer = ENUM_ABC_RECORD; - Record record = defaultRecordWithSchema(writer, FIELD_A, new EnumSymbol(writer, "A")); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(ENUM_AB_RECORD, writer, encoded); + Record record = defaultRecordWithSchema(writer, FIELD_A, new EnumSymbol(ENUM_ABC, "A")); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(ENUM_AB_RECORD, writer, encoded, encoderType); assertEquals("A", decoded.get(FIELD_A).toString()); } - @Test - public void enumRecordWithExtendedSchemaCanNotBeReadIfNewValuesAreUsed() throws Exception { - expectedException.expect(AvroTypeException.class); - expectedException.expectMessage("No match for C"); + @ParameterizedTest + @EnumSource(EncoderType.class) + void enumRecordWithExtendedSchemaCanNotBeReadIfNewValuesAreUsed(EncoderType encoderType) throws Exception { Schema writer = ENUM_ABC_RECORD; - Record record = defaultRecordWithSchema(writer, FIELD_A, new EnumSymbol(writer, "C")); - byte[] encoded = encodeGenericBlob(record); - decodeGenericBlob(ENUM_AB_RECORD, writer, encoded); + Record record = defaultRecordWithSchema(writer, FIELD_A, new EnumSymbol(ENUM_ABC, "C")); + byte[] encoded = encodeGenericBlob(record, encoderType); + + AvroTypeException exception = assertThrows(AvroTypeException.class, + () -> decodeGenericBlob(ENUM_AB_RECORD, writer, encoded, encoderType)); + assertEquals("No match for C", exception.getMessage()); } - @Test - public void recordWrittenWithExtendedSchemaCanBeReadWithOriginalSchemaButLossOfData() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void recordWrittenWithExtendedSchemaCanBeReadWithOriginalSchemaButLossOfData(EncoderType encoderType) + throws Exception { Schema writer = SchemaBuilder.record(RECORD_A) // .fields() // .name("newTopField").type().stringType().noDefault() // @@ -344,57 +359,71 @@ public void recordWrittenWithExtendedSchemaCanBeReadWithOriginalSchemaButLossOfD .endRecord(); Record record = defaultRecordWithSchema(writer, FIELD_A, 42); record.put("newTopField", "not decoded"); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(INT_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(INT_RECORD, writer, encoded, encoderType); assertEquals(42, decoded.get(FIELD_A)); try { decoded.get("newTopField"); - Assert.fail("get should throw a exception"); + fail("get should throw a exception"); } catch (AvroRuntimeException ex) { - Assert.assertEquals("Not a valid schema field: newTopField", ex.getMessage()); + assertEquals("Not a valid schema field: newTopField", ex.getMessage()); } } - @Test - public void readerWithoutDefaultValueThrowsException() throws Exception { - expectedException.expect(AvroTypeException.class); - expectedException.expectMessage("missing required field newField"); + @ParameterizedTest + @EnumSource(EncoderType.class) + void readerWithoutDefaultValueThrowsException(EncoderType encoderType) throws Exception { Schema reader = SchemaBuilder.record(RECORD_A) // .fields() // .name("newField").type().intType().noDefault() // .name(FIELD_A).type().intType().noDefault() // .endRecord(); Record record = defaultRecordWithSchema(INT_RECORD, FIELD_A, 42); - byte[] encoded = encodeGenericBlob(record); - decodeGenericBlob(reader, INT_RECORD, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + AvroTypeException exception = assertThrows(AvroTypeException.class, + () -> decodeGenericBlob(reader, INT_RECORD, encoded, encoderType)); + assertTrue(exception.getMessage().contains("missing required field newField"), exception.getMessage()); } - @Test - public void readerWithDefaultValueIsApplied() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void readerWithDefaultValueIsApplied(EncoderType encoderType) throws Exception { Schema reader = SchemaBuilder.record(RECORD_A) // .fields() // .name("newFieldWithDefault").type().intType().intDefault(314) // .name(FIELD_A).type().intType().noDefault() // .endRecord(); Record record = defaultRecordWithSchema(INT_RECORD, FIELD_A, 42); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(reader, INT_RECORD, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(reader, INT_RECORD, encoded, encoderType); assertEquals(42, decoded.get(FIELD_A)); assertEquals(314, decoded.get("newFieldWithDefault")); } - @Test - public void aliasesInSchema() throws Exception { - Schema writer = new Schema.Parser() - .parse("{\"namespace\": \"example.avro\", \"type\": \"record\", \"name\": \"User\", \"fields\": [" - + "{\"name\": \"name\", \"type\": \"int\"}\n" + "]}\n"); - Schema reader = new Schema.Parser() - .parse("{\"namespace\": \"example.avro\", \"type\": \"record\", \"name\": \"User\", \"fields\": [" - + "{\"name\": \"fname\", \"type\": \"int\", \"aliases\" : [ \"name\" ]}\n" + "]}\n"); + @ParameterizedTest + @EnumSource(EncoderType.class) + void readerWithEmptyListAsDefaultValueForUnionFieldIsApplied(EncoderType encoderType) throws Exception { + Schema writer = SchemaBuilder.record(RECORD_A) // + .fields() // + .endRecord(); + Record record = new GenericData.Record(writer); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(UNION_WITH_EMPTY_ARRAY_DEFAULT_RECORD, writer, encoded, encoderType); + assertEquals(emptyList(), decoded.get(FIELD_A)); + } + + @ParameterizedTest + @EnumSource(EncoderType.class) + void aliasesInSchema(EncoderType encoderType) throws Exception { + Schema writer = SchemaParser.parseSingle("{\"namespace\": \"example.avro\", \"type\": \"record\", " + + "\"name\": \"User\", \"fields\": [{\"name\": \"name\", \"type\": \"int\"}\n" + "]}\n"); + Schema reader = SchemaParser.parseSingle("{\"namespace\": \"example.avro\", \"type\": \"record\", " + + "\"name\": \"User\", \"fields\": [{\"name\": \"fname\", \"type\": \"int\", \"aliases\" : [ \"name\" ]}\n" + + "]}\n"); GenericData.Record record = defaultRecordWithSchema(writer, "name", 1); - byte[] encoded = encodeGenericBlob(record); - GenericData.Record decoded = decodeGenericBlob(reader, reader, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + GenericData.Record decoded = decodeGenericBlob(reader, reader, encoded, encoderType); assertEquals(1, decoded.get("fname")); } @@ -405,7 +434,7 @@ private Record defaultRecordWithSchema(Schema schema, String key, T value) { return data; } - private byte[] encodeGenericBlob(GenericRecord data) throws IOException { + private byte[] encodeGenericBlob(GenericRecord data, EncoderType encoderType) throws IOException { DatumWriter writer = new GenericDatumWriter<>(data.getSchema()); ByteArrayOutputStream outStream = new ByteArrayOutputStream(); Encoder encoder = encoderType == EncoderType.BINARY ? EncoderFactory.get().binaryEncoder(outStream, null) @@ -416,11 +445,14 @@ private byte[] encodeGenericBlob(GenericRecord data) throws IOException { return outStream.toByteArray(); } - private Record decodeGenericBlob(Schema expectedSchema, Schema schemaOfBlob, byte[] blob) throws IOException { + private Record decodeGenericBlob(Schema expectedSchema, Schema schemaOfBlob, byte[] blob, EncoderType encoderType) + throws IOException { if (blob == null) { return null; } - GenericDatumReader reader = new GenericDatumReader<>(); + GenericData data = new GenericData(); + data.setFastReaderEnabled(true); + GenericDatumReader reader = new GenericDatumReader<>(null, null, data); reader.setExpected(expectedSchema); reader.setSchema(schemaOfBlob); Decoder decoder = encoderType == EncoderType.BINARY ? DecoderFactory.get().binaryDecoder(blob, null) diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestResolver.java b/lang/java/avro/src/test/java/org/apache/avro/TestResolver.java new file mode 100644 index 00000000000..1d3919319af --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/TestResolver.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import java.io.IOException; +import java.util.Arrays; + +import org.apache.avro.data.TimeConversions; +import org.apache.avro.generic.IndexedRecord; +import org.apache.avro.io.DatumReader; +import org.apache.avro.io.DecoderFactory; +import org.apache.avro.io.FastReaderBuilder; +import org.apache.avro.io.JsonDecoder; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +class TestResolver { + + /** + * Test promote action INT -> LONG, with logical type for LONG. + */ + @Test + void resolveTime() { + final Schema writeSchema = Schema.create(Schema.Type.INT); + final Schema readSchema = new TimeConversions.TimeMicrosConversion().getRecommendedSchema(); // LONG + + Resolver.Action action = Resolver.resolve(writeSchema, readSchema); + Assertions.assertNotNull(action); + MatcherAssert.assertThat("Wrong class for action", action, Matchers.instanceOf(Resolver.Promote.class)); + Assertions.assertEquals(action.type, Resolver.Action.Type.PROMOTE); + Assertions.assertNotNull(action.logicalType); + } + + /** + * Test union type with promote action INT -> LONG, with logical type for LONG. + */ + @Test + void resolveUnion() { + final Schema schema = new TimeConversions.TimeMicrosConversion().getRecommendedSchema(); + + final Schema writeSchema = Schema.createUnion(Schema.create(Schema.Type.INT)); + final Schema readSchema = Schema.createUnion(schema); + + Resolver.Action action = Resolver.resolve(writeSchema, readSchema); + Assertions.assertNotNull(action); + Assertions.assertEquals(action.type, Resolver.Action.Type.WRITER_UNION); + MatcherAssert.assertThat("Wrong class for action", action, Matchers.instanceOf(Resolver.WriterUnion.class)); + + Assertions.assertEquals(1, ((Resolver.WriterUnion) action).actions.length); + Resolver.Action innerAction = ((Resolver.WriterUnion) action).actions[0]; + + MatcherAssert.assertThat("Wrong class for action", innerAction, Matchers.instanceOf(Resolver.ReaderUnion.class)); + Resolver.ReaderUnion innerUnionAction = (Resolver.ReaderUnion) innerAction; + Resolver.Action promoteAction = innerUnionAction.actualAction; + Assertions.assertEquals(promoteAction.type, Resolver.Action.Type.PROMOTE); + Assertions.assertNotNull(promoteAction.logicalType); + } + + @Test + void resolveEnum() throws IOException { + final Schema writeSchema = Schema.createEnum("myEnum", "", "n1", Arrays.asList("e1", "e3", "e4")); + final Schema readSchema = Schema.createEnum("myEnum", "", "n1", Arrays.asList("e1", "e2", "e3"), "e2"); + + Resolver.Action action = Resolver.resolve(writeSchema, readSchema); + Assertions.assertNotNull(action); + Assertions.assertEquals(action.type, Resolver.Action.Type.ENUM); + MatcherAssert.assertThat("Wrong class for action", action, Matchers.instanceOf(Resolver.EnumAdjust.class)); + Resolver.EnumAdjust adjust = (Resolver.EnumAdjust) action; + + Assertions.assertArrayEquals(new int[] { 0, 2, 1 }, adjust.adjustments); + Assertions.assertEquals("e1", adjust.values[0].toString()); + Assertions.assertEquals("e3", adjust.values[1].toString()); + Assertions.assertEquals("e2", adjust.values[2].toString()); + + FastReaderBuilder reader = FastReaderBuilder.get(); + Schema writeRecord = Schema.createRecord("rec1", "", "", false, + Arrays.asList(new Schema.Field("f1", writeSchema, ""))); + Schema readRecord = Schema.createRecord("rec1", "", "", false, + Arrays.asList(new Schema.Field("f1", readSchema, ""))); + DatumReader datumReader = reader.createDatumReader(writeRecord, readRecord); + JsonDecoder e2 = DecoderFactory.get().jsonDecoder(readRecord, "{ \"f1\" : \"e2\" }"); + Object read = datumReader.read(null, e2); + Assertions.assertNotNull(read); + MatcherAssert.assertThat("", read, Matchers.instanceOf(IndexedRecord.class)); + IndexedRecord result = (IndexedRecord) read; + Assertions.assertEquals("e3", result.get(0).toString()); + } + + @Test + void promoteIsValid() { + Assertions.assertThrows(IllegalArgumentException.class, + () -> Resolver.Promote.isValid(Schema.create(Schema.Type.INT), Schema.create(Schema.Type.INT))); + + Assertions.assertTrue(Resolver.Promote.isValid(Schema.create(Schema.Type.INT), Schema.create(Schema.Type.LONG))); + Assertions.assertFalse(Resolver.Promote.isValid(Schema.create(Schema.Type.LONG), Schema.create(Schema.Type.INT))); + + Assertions.assertTrue(Resolver.Promote.isValid(Schema.create(Schema.Type.INT), Schema.create(Schema.Type.FLOAT))); + Assertions.assertFalse(Resolver.Promote.isValid(Schema.create(Schema.Type.FLOAT), Schema.create(Schema.Type.INT))); + + Assertions + .assertTrue(Resolver.Promote.isValid(Schema.create(Schema.Type.FLOAT), Schema.create(Schema.Type.DOUBLE))); + Assertions + .assertFalse(Resolver.Promote.isValid(Schema.create(Schema.Type.DOUBLE), Schema.create(Schema.Type.FLOAT))); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java index 4b2a78bc8cb..1920633941c 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java @@ -17,42 +17,77 @@ */ package org.apache.avro; -import static org.junit.Assert.*; - import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; - +import java.util.Map; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.IntNode; +import com.fasterxml.jackson.databind.node.JsonNodeFactory; +import com.fasterxml.jackson.databind.node.NullNode; +import com.fasterxml.jackson.databind.node.TextNode; import org.apache.avro.Schema.Field; import org.apache.avro.Schema.Type; import org.apache.avro.generic.GenericData; -import org.junit.Test; +import org.apache.avro.generic.GenericData.EnumSymbol; +import org.apache.avro.generic.GenericData.Record; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.generic.GenericRecordBuilder; +import org.apache.avro.io.Encoder; +import org.apache.avro.io.EncoderFactory; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import static java.util.Objects.requireNonNull; +import static org.apache.avro.JsonSchemaParser.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; public class TestSchema { @Test - public void testSplitSchemaBuild() { + void splitSchemaBuild() { Schema s = SchemaBuilder.record("HandshakeRequest").namespace("org.apache.avro.ipc").fields().name("clientProtocol") .type().optional().stringType().name("meta").type().optional().map().values().bytesType().endRecord(); String schemaString = s.toString(); int mid = schemaString.length() / 2; - Schema parsedStringSchema = new org.apache.avro.Schema.Parser().parse(s.toString()); - Schema parsedArrayOfStringSchema = new org.apache.avro.Schema.Parser().parse(schemaString.substring(0, mid), - schemaString.substring(mid)); + // Use the internal parser: the use case for split string schemas is the + // SCHEMA$ constant in compiled schemas (implementing SpecificRecord). + Schema parsedStringSchema = parseInternal(s.toString()); + Schema parsedArrayOfStringSchema = parseInternal(schemaString.substring(0, mid), schemaString.substring(mid)); assertNotNull(parsedStringSchema); assertNotNull(parsedArrayOfStringSchema); assertEquals(parsedStringSchema.toString(), parsedArrayOfStringSchema.toString()); } @Test - public void testDefaultRecordWithDuplicateFieldName() { + void defaultRecordWithDuplicateFieldName() { String recordName = "name"; Schema schema = Schema.createRecord(recordName, "doc", "namespace", false); List fields = new ArrayList<>(); @@ -67,7 +102,7 @@ public void testDefaultRecordWithDuplicateFieldName() { } @Test - public void testCreateUnionVarargs() { + void createUnionVarargs() { List types = new ArrayList<>(); types.add(Schema.create(Type.NULL)); types.add(Schema.create(Type.LONG)); @@ -78,33 +113,35 @@ public void testCreateUnionVarargs() { } @Test - public void testRecordWithNullDoc() { + void recordWithNullDoc() { Schema schema = Schema.createRecord("name", null, "namespace", false); String schemaString = schema.toString(); assertNotNull(schemaString); } @Test - public void testRecordWithNullNamespace() { + void recordWithNullNamespace() { Schema schema = Schema.createRecord("name", "doc", null, false); String schemaString = schema.toString(); assertNotNull(schemaString); } @Test - public void testEmptyRecordSchema() { + void emptyRecordSchema() { Schema schema = createDefaultRecord(); String schemaString = schema.toString(); assertNotNull(schemaString); } - @Test(expected = SchemaParseException.class) - public void testParseEmptySchema() { - new Schema.Parser().parse(""); + @Test + void parseEmptySchema() { + assertThrows(SchemaParseException.class, () -> { + SchemaParser.parseSingle(""); + }); } @Test - public void testSchemaWithFields() { + void schemaWithFields() { List fields = new ArrayList<>(); fields.add(new Field("field_name1", Schema.create(Type.NULL), null, null)); fields.add(new Field("field_name2", Schema.create(Type.INT), null, null)); @@ -115,67 +152,69 @@ public void testSchemaWithFields() { assertEquals(2, schema.getFields().size()); } - @Test(expected = NullPointerException.class) - public void testSchemaWithNullFields() { - Schema.createRecord("name", "doc", "namespace", false, null); + @Test + void schemaWithNullFields() { + assertThrows(NullPointerException.class, () -> { + Schema.createRecord("name", "doc", "namespace", false, null); + }); } @Test - public void testIsUnionOnUnionWithMultipleElements() { + void isUnionOnUnionWithMultipleElements() { Schema schema = Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.LONG)); assertTrue(schema.isUnion()); } @Test - public void testIsUnionOnUnionWithOneElement() { + void isUnionOnUnionWithOneElement() { Schema schema = Schema.createUnion(Schema.create(Type.LONG)); assertTrue(schema.isUnion()); } @Test - public void testIsUnionOnRecord() { + void isUnionOnRecord() { Schema schema = createDefaultRecord(); assertFalse(schema.isUnion()); } @Test - public void testIsUnionOnArray() { + void isUnionOnArray() { Schema schema = Schema.createArray(Schema.create(Type.LONG)); assertFalse(schema.isUnion()); } @Test - public void testIsUnionOnEnum() { + void isUnionOnEnum() { Schema schema = Schema.createEnum("name", "doc", "namespace", Collections.singletonList("value")); assertFalse(schema.isUnion()); } @Test - public void testIsUnionOnFixed() { + void isUnionOnFixed() { Schema schema = Schema.createFixed("name", "doc", "space", 10); assertFalse(schema.isUnion()); } @Test - public void testIsUnionOnMap() { + void isUnionOnMap() { Schema schema = Schema.createMap(Schema.create(Type.LONG)); assertFalse(schema.isUnion()); } @Test - public void testIsNullableOnUnionWithNull() { + void isNullableOnUnionWithNull() { Schema schema = Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.LONG)); assertTrue(schema.isNullable()); } @Test - public void testIsNullableOnUnionWithoutNull() { + void isNullableOnUnionWithoutNull() { Schema schema = Schema.createUnion(Schema.create(Type.LONG)); assertFalse(schema.isNullable()); } @Test - public void testIsNullableOnRecord() { + void isNullableOnRecord() { Schema schema = createDefaultRecord(); assertFalse(schema.isNullable()); } @@ -185,12 +224,12 @@ private Schema createDefaultRecord() { } @Test - public void testSerialization() throws IOException, ClassNotFoundException { + void serialization() throws IOException, ClassNotFoundException { try (ByteArrayOutputStream bos = new ByteArrayOutputStream(); ObjectOutputStream oos = new ObjectOutputStream(bos); InputStream jsonSchema = getClass().getResourceAsStream("/SchemaBuilder.avsc")) { - Schema payload = new Schema.Parser().parse(jsonSchema); + Schema payload = new SchemaParser().parse(jsonSchema).mainSchema(); oos.writeObject(payload); try (ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); @@ -202,14 +241,14 @@ public void testSerialization() throws IOException, ClassNotFoundException { } @Test - public void testReconstructSchemaStringWithoutInlinedChildReference() { + void reconstructSchemaStringWithoutInlinedChildReference() { String child = "{\"type\":\"record\"," + "\"name\":\"Child\"," + "\"namespace\":\"org.apache.avro.nested\"," + "\"fields\":" + "[{\"name\":\"childField\",\"type\":\"string\"}]}"; String parent = "{\"type\":\"record\"," + "\"name\":\"Parent\"," + "\"namespace\":\"org.apache.avro.nested\"," + "\"fields\":" + "[{\"name\":\"child\",\"type\":\"Child\"}]}"; - Schema.Parser parser = new Schema.Parser(); - Schema childSchema = parser.parse(child); - Schema parentSchema = parser.parse(parent); + SchemaParser parser = new SchemaParser(); + Schema childSchema = parser.parse(child).mainSchema(); + Schema parentSchema = parser.parse(parent).mainSchema(); String parentWithoutInlinedChildReference = parentSchema.toString(Collections.singleton(childSchema), false); // The generated string should be the same as the original parent // schema string that did not have the child schema inlined. @@ -217,7 +256,7 @@ public void testReconstructSchemaStringWithoutInlinedChildReference() { } @Test - public void testIntDefaultValue() { + void intDefaultValue() { Schema.Field field = new Schema.Field("myField", Schema.create(Schema.Type.INT), "doc", 1); assertTrue(field.hasDefaultValue()); assertEquals(1, field.defaultVal()); @@ -235,7 +274,7 @@ public void testIntDefaultValue() { } @Test - public void testValidLongAsIntDefaultValue() { + void validLongAsIntDefaultValue() { Schema.Field field = new Schema.Field("myField", Schema.create(Schema.Type.INT), "doc", 1L); assertTrue(field.hasDefaultValue()); assertEquals(1, field.defaultVal()); @@ -252,18 +291,22 @@ public void testValidLongAsIntDefaultValue() { assertEquals(Integer.MAX_VALUE, GenericData.get().getDefaultValue(field)); } - @Test(expected = AvroTypeException.class) - public void testInvalidLongAsIntDefaultValue() { - new Schema.Field("myField", Schema.create(Schema.Type.INT), "doc", Integer.MAX_VALUE + 1L); + @Test + void invalidLongAsIntDefaultValue() { + assertThrows(AvroTypeException.class, () -> { + new Schema.Field("myField", Schema.create(Schema.Type.INT), "doc", Integer.MAX_VALUE + 1L); + }); } - @Test(expected = AvroTypeException.class) - public void testDoubleAsIntDefaultValue() { - new Schema.Field("myField", Schema.create(Schema.Type.INT), "doc", 1.0); + @Test + void doubleAsIntDefaultValue() { + assertThrows(AvroTypeException.class, () -> { + new Schema.Field("myField", Schema.create(Schema.Type.INT), "doc", 1.0); + }); } @Test - public void testLongDefaultValue() { + void longDefaultValue() { Schema.Field field = new Schema.Field("myField", Schema.create(Schema.Type.LONG), "doc", 1L); assertTrue(field.hasDefaultValue()); assertEquals(1L, field.defaultVal()); @@ -281,20 +324,22 @@ public void testLongDefaultValue() { } @Test - public void testIntAsLongDefaultValue() { + void intAsLongDefaultValue() { Schema.Field field = new Schema.Field("myField", Schema.create(Schema.Type.LONG), "doc", 1); assertTrue(field.hasDefaultValue()); assertEquals(1L, field.defaultVal()); assertEquals(1L, GenericData.get().getDefaultValue(field)); } - @Test(expected = AvroTypeException.class) - public void testDoubleAsLongDefaultValue() { - new Schema.Field("myField", Schema.create(Schema.Type.LONG), "doc", 1.0); + @Test + void doubleAsLongDefaultValue() { + assertThrows(AvroTypeException.class, () -> { + new Schema.Field("myField", Schema.create(Schema.Type.LONG), "doc", 1.0); + }); } @Test - public void testDoubleDefaultValue() { + void doubleDefaultValue() { Schema.Field field = new Schema.Field("myField", Schema.create(Schema.Type.DOUBLE), "doc", 1.0); assertTrue(field.hasDefaultValue()); assertEquals(1.0d, field.defaultVal()); @@ -302,7 +347,7 @@ public void testDoubleDefaultValue() { } @Test - public void testIntAsDoubleDefaultValue() { + void intAsDoubleDefaultValue() { Schema.Field field = new Schema.Field("myField", Schema.create(Schema.Type.DOUBLE), "doc", 1); assertTrue(field.hasDefaultValue()); assertEquals(1.0d, field.defaultVal()); @@ -310,7 +355,7 @@ public void testIntAsDoubleDefaultValue() { } @Test - public void testLongAsDoubleDefaultValue() { + void longAsDoubleDefaultValue() { Schema.Field field = new Schema.Field("myField", Schema.create(Schema.Type.DOUBLE), "doc", 1L); assertTrue(field.hasDefaultValue()); assertEquals(1.0d, field.defaultVal()); @@ -318,7 +363,7 @@ public void testLongAsDoubleDefaultValue() { } @Test - public void testFloatAsDoubleDefaultValue() { + void floatAsDoubleDefaultValue() { Schema.Field field = new Schema.Field("myField", Schema.create(Schema.Type.DOUBLE), "doc", 1.0f); assertTrue(field.hasDefaultValue()); assertEquals(1.0d, field.defaultVal()); @@ -326,7 +371,7 @@ public void testFloatAsDoubleDefaultValue() { } @Test - public void testFloatDefaultValue() { + void floatDefaultValue() { Schema.Field field = new Schema.Field("myField", Schema.create(Schema.Type.FLOAT), "doc", 1.0f); assertTrue(field.hasDefaultValue()); assertEquals(1.0f, field.defaultVal()); @@ -334,7 +379,7 @@ public void testFloatDefaultValue() { } @Test - public void testIntAsFloatDefaultValue() { + void intAsFloatDefaultValue() { Schema.Field field = new Schema.Field("myField", Schema.create(Schema.Type.FLOAT), "doc", 1); assertTrue(field.hasDefaultValue()); assertEquals(1.0f, field.defaultVal()); @@ -342,7 +387,7 @@ public void testIntAsFloatDefaultValue() { } @Test - public void testLongAsFloatDefaultValue() { + void longAsFloatDefaultValue() { Schema.Field field = new Schema.Field("myField", Schema.create(Schema.Type.FLOAT), "doc", 1L); assertTrue(field.hasDefaultValue()); assertEquals(1.0f, field.defaultVal()); @@ -350,15 +395,297 @@ public void testLongAsFloatDefaultValue() { } @Test - public void testDoubleAsFloatDefaultValue() { + void doubleAsFloatDefaultValue() { Schema.Field field = new Schema.Field("myField", Schema.create(Schema.Type.FLOAT), "doc", 1.0d); assertTrue(field.hasDefaultValue()); assertEquals(1.0f, field.defaultVal()); assertEquals(1.0f, GenericData.get().getDefaultValue(field)); } - @Test(expected = SchemaParseException.class) - public void testEnumSymbolAsNull() { - Schema.createEnum("myField", "doc", "namespace", Collections.singletonList(null)); + @Test + void enumSymbolAsNull() { + assertThrows(SchemaParseException.class, () -> { + Schema.createEnum("myField", "doc", "namespace", Collections.singletonList(null)); + }); + } + + @Test + void schemaFieldWithoutSchema() { + assertThrows(NullPointerException.class, () -> { + new Schema.Field("f", null); + }); + } + + @Test + void parseRecordWithNameAsType() { + final String schemaString = "{\n \"type\" : \"record\",\n \"name\" : \"ns.int\",\n" + + " \"fields\" : [ \n {\"name\" : \"value\", \"type\" : \"int\"}, \n" + + " {\"name\" : \"next\", \"type\" : [ \"null\", \"ns.int\" ]}\n ]\n}"; + final Schema schema = SchemaParser.parseSingle(schemaString); + String toString = schema.toString(true); + + final Schema schema2 = SchemaParser.parseSingle(toString); + assertEquals(schema, schema2); + } + + @Test + void qualifiedName() { + Arrays.stream(Type.values()).forEach((Type t) -> { + final Schema.Name name = new Schema.Name(t.getName(), "space"); + assertEquals("space." + t.getName(), name.getQualified("space")); + assertEquals("space." + t.getName(), name.getQualified("otherdefault")); + }); + final Schema.Name name = new Schema.Name("name", "space"); + assertEquals("name", name.getQualified("space")); + assertEquals("space.name", name.getQualified("otherdefault")); + + final Schema.Name nameInt = new Schema.Name("Int", "space"); + assertEquals("Int", nameInt.getQualified("space")); + } + + @Test + void validValue() { + // Valid null value + final Schema nullSchema = Schema.create(Type.NULL); + assertTrue(nullSchema.isValidDefault(JsonNodeFactory.instance.nullNode())); + + // Valid int value + final Schema intSchema = Schema.create(Type.INT); + assertTrue(intSchema.isValidDefault(JsonNodeFactory.instance.numberNode(12))); + + // Valid Text value + final Schema strSchema = Schema.create(Type.STRING); + assertTrue(strSchema.isValidDefault(new TextNode("textNode"))); + + // Valid Array value + final Schema arraySchema = Schema.createArray(Schema.create(Type.STRING)); + final ArrayNode arrayValue = JsonNodeFactory.instance.arrayNode(); + assertTrue(arraySchema.isValidDefault(arrayValue)); // empty array + + arrayValue.add("Hello"); + arrayValue.add("World"); + assertTrue(arraySchema.isValidDefault(arrayValue)); + + arrayValue.add(5); + assertFalse(arraySchema.isValidDefault(arrayValue)); + + // Valid Union type + final Schema unionSchema = Schema.createUnion(strSchema, intSchema, nullSchema); + assertTrue(unionSchema.isValidDefault(JsonNodeFactory.instance.textNode("Hello"))); + assertTrue(unionSchema.isValidDefault(new IntNode(23))); + assertTrue(unionSchema.isValidDefault(JsonNodeFactory.instance.nullNode())); + + assertFalse(unionSchema.isValidDefault(arrayValue)); + + // Array of union + final Schema arrayUnion = Schema.createArray(unionSchema); + final ArrayNode arrayUnionValue = JsonNodeFactory.instance.arrayNode(); + arrayUnionValue.add("Hello"); + arrayUnionValue.add(NullNode.getInstance()); + assertTrue(arrayUnion.isValidDefault(arrayUnionValue)); + + // Union String, bytes + final Schema unionStrBytes = Schema.createUnion(strSchema, Schema.create(Type.BYTES)); + assertTrue(unionStrBytes.isValidDefault(JsonNodeFactory.instance.textNode("Hello"))); + assertFalse(unionStrBytes.isValidDefault(JsonNodeFactory.instance.numberNode(123))); + } + + @Test + void enumLateDefine() { + String schemaString = "{\n" + " \"type\":\"record\",\n" + " \"name\": \"Main\",\n" + " \"fields\":[\n" + + " {\n" + " \"name\":\"f1\",\n" + " \"type\":\"Sub\"\n" + " },\n" + + " {\n" + " \"name\":\"f2\",\n" + " \"type\":{\n" + + " \"type\":\"enum\",\n" + " \"name\":\"Sub\",\n" + + " \"symbols\":[\"OPEN\",\"CLOSE\"]\n" + " }\n" + " }\n" + " ]\n" + "}"; + + final Schema schema = SchemaParser.parseSingle(schemaString); + Schema f1Schema = schema.getField("f1").schema(); + Schema f2Schema = schema.getField("f2").schema(); + assertSame(f1Schema, f2Schema); + assertEquals(Type.ENUM, f1Schema.getType()); + String stringSchema = schema.toString(); + int definitionIndex = stringSchema.indexOf("\"symbols\":[\"OPEN\",\"CLOSE\"]"); + int usageIndex = stringSchema.indexOf("\"type\":\"Sub\""); + assertTrue(definitionIndex < usageIndex, "usage is before definition"); + } + + @Test + public void testRecordInArray() { + String schemaString = "{\n" + " \"type\": \"record\",\n" + " \"name\": \"TestRecord\",\n" + " \"fields\": [\n" + + " {\n" + " \"name\": \"value\",\n" + " \"type\": {\n" + " \"type\": \"record\",\n" + + " \"name\": \"Container\",\n" + " \"fields\": [\n" + " {\n" + + " \"name\": \"Optional\",\n" + " \"type\": {\n" + " \"type\": \"array\",\n" + + " \"items\": [\n" + " {\n" + " \"type\": \"record\",\n" + + " \"name\": \"optional_field_0\",\n" + " \"namespace\": \"\",\n" + + " \"doc\": \"\",\n" + " \"fields\": [\n" + " {\n" + + " \"name\": \"optional_field_1\",\n" + " \"type\": \"long\",\n" + + " \"doc\": \"\",\n" + " \"default\": 0\n" + + " }\n" + " ]\n" + " }\n" + " ]\n" + + " }\n" + " }\n" + " ]\n" + " }\n" + " }\n" + " ]\n" + "}"; + final Schema schema = SchemaParser.parseSingle(schemaString); + assertNotNull(schema); + } + + /* + * @Test public void testRec() { String schemaString = + * "[{\"name\":\"employees\",\"type\":[\"null\",{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"Pair1081149ea1d6eb80\",\"fields\":[{\"name\":\"key\",\"type\":\"int\"},{\"name\":\"value\",\"type\":{\"type\":\"record\",\"name\":\"EmployeeInfo2\",\"fields\":[{\"name\":\"companyMap\",\"type\":[\"null\",{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"PairIntegerString\",\"fields\":[{\"name\":\"key\",\"type\":\"int\"},{\"name\":\"value\",\"type\":\"string\"}]},\"java-class\":\"java.util.HashMap\"}],\"default\":null},{\"name\":\"name\",\"type\":[\"null\",\"string\"],\"default\":null}]}}]},\"java-class\":\"java.util.HashMap\"}],\"default\":null}]"; + * final Schema schema = JsonSchemaParser.parseInternal(schemaString); + * Assert.assertNotNull(schema); + * + * } + */ + + @Test + public void testUnionFieldType() { + String schemaString = "{\"type\": \"record\", \"name\": \"Lisp\", \"fields\": [{\"name\":\"value\", \"type\":[\"null\", \"string\",{\"type\": \"record\", \"name\": \"Cons\", \"fields\": [{\"name\":\"car\", \"type\":\"Lisp\"},{\"name\":\"cdr\", \"type\":\"Lisp\"}]}]}]}"; + final Schema schema = SchemaParser.parseSingle(schemaString); + Field value = schema.getField("value"); + Schema fieldSchema = value.schema(); + Schema subSchema = fieldSchema.getTypes().stream().filter((Schema s) -> s.getType() == Type.RECORD).findFirst() + .get(); + assertTrue(subSchema.hasFields()); + } + + @Test + public void parseAliases() throws JsonProcessingException { + String s1 = "{ \"aliases\" : [\"a1\", \"b1\"]}"; + ObjectMapper mapper = new ObjectMapper(); + JsonNode j1 = mapper.readTree(s1); + Set aliases = Schema.parseAliases(j1); + assertEquals(2, aliases.size()); + assertTrue(aliases.contains("a1")); + assertTrue(aliases.contains("b1")); + + String s2 = "{ \"aliases\" : {\"a1\": \"b1\"}}"; + JsonNode j2 = mapper.readTree(s2); + + SchemaParseException ex = assertThrows(SchemaParseException.class, () -> Schema.parseAliases(j2)); + assertTrue(ex.getMessage().contains("aliases not an array")); + + String s3 = "{ \"aliases\" : [11, \"b1\"]}"; + JsonNode j3 = mapper.readTree(s3); + SchemaParseException ex3 = assertThrows(SchemaParseException.class, () -> Schema.parseAliases(j3)); + assertTrue(ex3.getMessage().contains("alias not a string")); + } + + @Test + void testContentAfterAvsc() { + SchemaParser parser = new SchemaParser(); + assertThrows(SchemaParseException.class, () -> parser.parse("{\"type\": \"string\"}; DROP TABLE STUDENTS")); + } + + @Test + void testContentAfterAvscInInputStream() throws Exception { + // This test only works for the old parser, as the new parser first consumes + // the entire input stream. + Schema.Parser parser = new Schema.Parser(NameValidator.UTF_VALIDATOR); + parser.setValidateDefaults(true); + String avsc = "{\"type\": \"string\"}; DROP TABLE STUDENTS"; + ByteArrayInputStream is = new ByteArrayInputStream(avsc.getBytes(StandardCharsets.UTF_8)); + Schema schema = parser.parse(is); + assertNotNull(schema); + } + + @Test + void testContentAfterAvscInFile() throws Exception { + File avscFile = Files.createTempFile("testContentAfterAvscInFile", null).toFile(); + try (FileWriter writer = new FileWriter(avscFile)) { + writer.write("{\"type\": \"string\"}; DROP TABLE STUDENTS"); + writer.flush(); + } + + SchemaParser parser = new SchemaParser(); + assertThrows(SchemaParseException.class, () -> parser.parse(avscFile)); + } + + @Test + void testParseMultipleFile() throws IOException { + URL directory = requireNonNull(Thread.currentThread().getContextClassLoader().getResource("multipleFile")); + File f1 = new File(directory.getPath(), "ApplicationEvent.avsc"); + File f2 = new File(directory.getPath(), "DocumentInfo.avsc"); + File f3 = new File(directory.getPath(), "MyResponse.avsc"); + Assertions.assertTrue(f1.exists(), "File not exist for test " + f1.getPath()); + Assertions.assertTrue(f2.exists(), "File not exist for test " + f2.getPath()); + Assertions.assertTrue(f3.exists(), "File not exist for test " + f3.getPath()); + SchemaParser parser = new SchemaParser(); + parser.parse(f1); + parser.parse(f2); + parser.parse(f3); + final Map schemas = parser.getParsedNamedSchemas().stream() + .collect(Collectors.toMap(Schema::getName, Function.identity())); + Assertions.assertEquals(3, schemas.size()); + Schema schemaAppEvent = schemas.get("ApplicationEvent"); + Schema schemaDocInfo = schemas.get("DocumentInfo"); + Schema schemaResponse = schemas.get("MyResponse"); + Assertions.assertNotNull(schemaAppEvent); + Assertions.assertEquals(4, schemaAppEvent.getFields().size()); + Field documents = schemaAppEvent.getField("documents"); + Schema docSchema = documents.schema().getTypes().get(1).getElementType(); + Assertions.assertEquals(docSchema, schemaDocInfo); + Assertions.assertNotNull(schemaDocInfo); + Assertions.assertNotNull(schemaResponse); + } + + @Test + void add_types() { + String schemaRecord2 = "{\"type\":\"record\", \"name\":\"record2\", \"fields\": [" + + " {\"name\":\"f1\", \"type\":\"record1\" }" + "]}"; // register schema1 in schema. + Schema schemaRecord1 = Schema.createRecord("record1", "doc", "", false); + schemaRecord1.setFields(Collections.singletonList(new Field("name", Schema.create(Type.STRING)))); + SchemaParser parser = new SchemaParser(); + parser.parse(schemaRecord1.toString()); + + // parse schema for record2 that contains field for schema1. + final Schema schema = parser.parse(schemaRecord2).mainSchema(); + final Field f1 = schema.getField("f1"); + assertNotNull(f1); + assertEquals(schemaRecord1, f1.schema()); + } + + /** + * Tests the behavior of Schema.Parser if its validation option is set to + * `null`. This is then set to the default option `NO_VALIDATION`. + */ + @Test + void testParserNullValidate() { + new SchemaParser(null).parse("{\"type\":\"record\",\"name\":\"\",\"fields\":[]}"); // Empty name + } + + @Test + void disallowTypeObjectForNamedType() { + String withTypeObjectWithNamedType = "{" + + "\"namespace\":\"tests\",\"type\":\"record\",\"name\":\"Invalid\",\"fields\":[" + + "{\"name\":\"good\",\"type\":{\"type\":\"fixed\",\"name\":\"Hash\",\"size\":16}}," + + "{\"name\":\"right\",\"type\":{\"type\":\"Hash\"}}]}"; + assertThrows(SchemaParseException.class, () -> new SchemaParser().parse(withTypeObjectWithNamedType).mainSchema()); + } + + /** + * Tests when a user tries to write a record with an invalid enum symbol value + * that the exception returned is more descriptive than just a NPE or an + * incorrect mention of an unspecified non-null field. + */ + @Test + void enumWriteUnknownField() throws IOException { + Schema schema = Schema.createRecord("record1", "doc", "", false); + String goodValue = "HELLO"; + Schema enumSchema = Schema.createEnum("enum1", "doc", "", Arrays.asList(goodValue)); + Field field1 = new Field("field1", enumSchema); + schema.setFields(Collections.singletonList(field1)); + + GenericDatumWriter datumWriter = new GenericDatumWriter<>(schema); + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + Encoder encoder = EncoderFactory.get().binaryEncoder(byteArrayOutputStream, null); + GenericRecordBuilder builder = new GenericRecordBuilder(schema); + String badValue = "GOODBYE"; + builder.set(field1, new EnumSymbol(enumSchema, badValue)); + Record record = builder.build(); + try { + datumWriter.write(record, encoder); + fail("should have thrown"); + } catch (AvroTypeException ate) { + assertTrue(ate.getMessage().contains(goodValue)); + assertTrue(ate.getMessage().contains(badValue)); + } } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaBuilder.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaBuilder.java index 77ee588b1e3..8d61eed301a 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaBuilder.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaBuilder.java @@ -17,6 +17,8 @@ */ package org.apache.avro; +import static org.junit.jupiter.api.Assertions.*; + import com.fasterxml.jackson.databind.node.NullNode; import java.io.File; import java.io.IOException; @@ -36,93 +38,93 @@ import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.generic.GenericRecordBuilder; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; public class TestSchemaBuilder { - @Rule - public TemporaryFolder DIR = new TemporaryFolder(); + @TempDir + public File DIR; @Test - public void testRecord() { + void record() { Schema schema = SchemaBuilder.record("myrecord").namespace("org.example").aliases("oldrecord").fields().name("f0") .aliases("f0alias").type().stringType().noDefault().name("f1").doc("This is f1").type().longType().noDefault() .name("f2").type().nullable().booleanType().booleanDefault(true).name("f3").type().unionOf().nullType().and() .booleanType().endUnion().nullDefault().endRecord(); - Assert.assertEquals("myrecord", schema.getName()); - Assert.assertEquals("org.example", schema.getNamespace()); - Assert.assertEquals("org.example.oldrecord", schema.getAliases().iterator().next()); - Assert.assertFalse(schema.isError()); + assertEquals("myrecord", schema.getName()); + assertEquals("org.example", schema.getNamespace()); + assertEquals("org.example.oldrecord", schema.getAliases().iterator().next()); + assertFalse(schema.isError()); List fields = schema.getFields(); - Assert.assertEquals(4, fields.size()); - Assert.assertEquals(new Schema.Field("f0", Schema.create(Schema.Type.STRING)), fields.get(0)); - Assert.assertTrue(fields.get(0).aliases().contains("f0alias")); - Assert.assertEquals(new Schema.Field("f1", Schema.create(Schema.Type.LONG), "This is f1"), fields.get(1)); + assertEquals(4, fields.size()); + assertEquals(new Schema.Field("f0", Schema.create(Schema.Type.STRING)), fields.get(0)); + assertTrue(fields.get(0).aliases().contains("f0alias")); + assertEquals(new Schema.Field("f1", Schema.create(Schema.Type.LONG), "This is f1"), fields.get(1)); List types = new ArrayList<>(); types.add(Schema.create(Schema.Type.BOOLEAN)); types.add(Schema.create(Schema.Type.NULL)); Schema optional = Schema.createUnion(types); - Assert.assertEquals(new Schema.Field("f2", optional, null, true), fields.get(2)); + assertEquals(new Schema.Field("f2", optional, null, true), fields.get(2)); List types2 = new ArrayList<>(); types2.add(Schema.create(Schema.Type.NULL)); types2.add(Schema.create(Schema.Type.BOOLEAN)); Schema optional2 = Schema.createUnion(types2); - Assert.assertNotEquals(new Schema.Field("f3", optional2, null, (Object) null), fields.get(3)); - Assert.assertEquals(new Schema.Field("f3", optional2, null, Schema.Field.NULL_DEFAULT_VALUE), fields.get(3)); + assertNotEquals(new Schema.Field("f3", optional2, null, (Object) null), fields.get(3)); + assertEquals(new Schema.Field("f3", optional2, null, Schema.Field.NULL_DEFAULT_VALUE), fields.get(3)); } @Test - public void testDoc() { + void doc() { Schema s = SchemaBuilder.fixed("myfixed").doc("mydoc").size(1); - Assert.assertEquals("mydoc", s.getDoc()); + assertEquals("mydoc", s.getDoc()); } @Test - public void testProps() { + void props() { Schema s = SchemaBuilder.builder().intBuilder().prop("p1", "v1").prop("p2", "v2").prop("p2", "v2real") // overwrite .endInt(); int size = s.getObjectProps().size(); - Assert.assertEquals(2, size); - Assert.assertEquals("v1", s.getProp("p1")); - Assert.assertEquals("v2real", s.getProp("p2")); + assertEquals(2, size); + assertEquals("v1", s.getProp("p1")); + assertEquals("v2real", s.getProp("p2")); } @Test - public void testObjectProps() { + void objectProps() { Schema s = SchemaBuilder.builder().intBuilder().prop("booleanProp", true).prop("intProp", Integer.MAX_VALUE) .prop("longProp", Long.MAX_VALUE).prop("floatProp", 1.0f).prop("doubleProp", Double.MAX_VALUE) .prop("byteProp", new byte[] { 0x41, 0x42, 0x43 }).prop("stringProp", "abc").endInt(); // object properties - Assert.assertEquals(7, s.getObjectProps().size()); - Assert.assertTrue(s.getObjectProp("booleanProp") instanceof Boolean); - Assert.assertEquals(true, s.getObjectProp("booleanProp")); - Assert.assertTrue(s.getObjectProp("intProp") instanceof Integer); - Assert.assertEquals(Integer.MAX_VALUE, s.getObjectProp("intProp")); - Assert.assertTrue(s.getObjectProp("intProp") instanceof Integer); - Assert.assertTrue(s.getObjectProp("longProp") instanceof Long); - Assert.assertEquals(Long.MAX_VALUE, s.getObjectProp("longProp")); - Assert.assertTrue(s.getObjectProp("floatProp") instanceof Double); + assertEquals(7, s.getObjectProps().size()); + assertTrue(s.getObjectProp("booleanProp") instanceof Boolean); + assertEquals(true, s.getObjectProp("booleanProp")); + assertTrue(s.getObjectProp("intProp") instanceof Integer); + assertEquals(Integer.MAX_VALUE, s.getObjectProp("intProp")); + assertTrue(s.getObjectProp("intProp") instanceof Integer); + assertTrue(s.getObjectProp("longProp") instanceof Long); + assertEquals(Long.MAX_VALUE, s.getObjectProp("longProp")); + assertTrue(s.getObjectProp("floatProp") instanceof Double); // float converts to double - Assert.assertEquals(1.0d, s.getObjectProp("floatProp")); - Assert.assertTrue(s.getObjectProp("doubleProp") instanceof Double); - Assert.assertEquals(Double.MAX_VALUE, s.getObjectProp("doubleProp")); + assertEquals(1.0d, s.getObjectProp("floatProp")); + assertTrue(s.getObjectProp("doubleProp") instanceof Double); + assertEquals(Double.MAX_VALUE, s.getObjectProp("doubleProp")); // byte[] converts to string - Assert.assertTrue(s.getObjectProp("byteProp") instanceof String); - Assert.assertEquals("ABC", s.getObjectProp("byteProp")); - Assert.assertTrue(s.getObjectProp("stringProp") instanceof String); - Assert.assertEquals("abc", s.getObjectProp("stringProp")); + assertTrue(s.getObjectProp("byteProp") instanceof String); + assertEquals("ABC", s.getObjectProp("byteProp")); + assertTrue(s.getObjectProp("stringProp") instanceof String); + assertEquals("abc", s.getObjectProp("stringProp")); } @Test - public void testFieldObjectProps() { + void fieldObjectProps() { Schema s = SchemaBuilder.builder().record("MyRecord").fields().name("myField").prop("booleanProp", true) .prop("intProp", Integer.MAX_VALUE).prop("longProp", Long.MAX_VALUE).prop("floatProp", 1.0f) .prop("doubleProp", Double.MAX_VALUE).prop("byteProp", new byte[] { 0x41, 0x42, 0x43 }) @@ -131,28 +133,31 @@ public void testFieldObjectProps() { Schema.Field f = s.getField("myField"); // object properties - Assert.assertEquals(7, f.getObjectProps().size()); - Assert.assertTrue(f.getObjectProp("booleanProp") instanceof Boolean); - Assert.assertEquals(true, f.getObjectProp("booleanProp")); - Assert.assertTrue(f.getObjectProp("intProp") instanceof Integer); - Assert.assertEquals(Integer.MAX_VALUE, f.getObjectProp("intProp")); - Assert.assertTrue(f.getObjectProp("intProp") instanceof Integer); - Assert.assertTrue(f.getObjectProp("longProp") instanceof Long); - Assert.assertEquals(Long.MAX_VALUE, f.getObjectProp("longProp")); - Assert.assertTrue(f.getObjectProp("floatProp") instanceof Double); + assertEquals(7, f.getObjectProps().size()); + assertTrue(f.getObjectProp("booleanProp") instanceof Boolean); + assertEquals(true, f.getObjectProp("booleanProp")); + assertTrue(f.getObjectProp("intProp") instanceof Integer); + assertEquals(Integer.MAX_VALUE, f.getObjectProp("intProp")); + assertTrue(f.getObjectProp("intProp") instanceof Integer); + assertTrue(f.getObjectProp("longProp") instanceof Long); + assertEquals(Long.MAX_VALUE, f.getObjectProp("longProp")); + assertTrue(f.getObjectProp("floatProp") instanceof Double); // float converts to double - Assert.assertEquals(1.0d, f.getObjectProp("floatProp")); - Assert.assertTrue(f.getObjectProp("doubleProp") instanceof Double); - Assert.assertEquals(Double.MAX_VALUE, f.getObjectProp("doubleProp")); + assertEquals(1.0d, f.getObjectProp("floatProp")); + assertTrue(f.getObjectProp("doubleProp") instanceof Double); + assertEquals(Double.MAX_VALUE, f.getObjectProp("doubleProp")); // byte[] converts to string - Assert.assertTrue(f.getObjectProp("byteProp") instanceof String); - Assert.assertEquals("ABC", f.getObjectProp("byteProp")); - Assert.assertTrue(f.getObjectProp("stringProp") instanceof String); - Assert.assertEquals("abc", f.getObjectProp("stringProp")); + assertTrue(f.getObjectProp("byteProp") instanceof String); + assertEquals("ABC", f.getObjectProp("byteProp")); + assertTrue(f.getObjectProp("stringProp") instanceof String); + assertEquals("abc", f.getObjectProp("stringProp")); + + assertEquals("abc", f.getObjectProp("stringProp", "default")); + assertEquals("default", f.getObjectProp("unknwon", "default")); } @Test - public void testArrayObjectProp() { + void arrayObjectProp() { List values = new ArrayList<>(); values.add(true); values.add(Integer.MAX_VALUE); @@ -165,26 +170,26 @@ public void testArrayObjectProp() { Schema s = SchemaBuilder.builder().intBuilder().prop("arrayProp", values).endInt(); // object properties - Assert.assertEquals(1, s.getObjectProps().size()); + assertEquals(1, s.getObjectProps().size()); - Assert.assertTrue(s.getObjectProp("arrayProp") instanceof Collection); + assertTrue(s.getObjectProp("arrayProp") instanceof Collection); @SuppressWarnings("unchecked") Collection valueCollection = (Collection) s.getObjectProp("arrayProp"); Iterator iter = valueCollection.iterator(); - Assert.assertEquals(7, valueCollection.size()); - Assert.assertEquals(true, iter.next()); - Assert.assertEquals(Integer.MAX_VALUE, iter.next()); - Assert.assertEquals(Long.MAX_VALUE, iter.next()); + assertEquals(7, valueCollection.size()); + assertEquals(true, iter.next()); + assertEquals(Integer.MAX_VALUE, iter.next()); + assertEquals(Long.MAX_VALUE, iter.next()); // float converts to double - Assert.assertEquals(1.0d, iter.next()); - Assert.assertEquals(Double.MAX_VALUE, iter.next()); + assertEquals(1.0d, iter.next()); + assertEquals(Double.MAX_VALUE, iter.next()); // byte[] converts to string - Assert.assertEquals("ABC", iter.next()); - Assert.assertEquals("abc", iter.next()); + assertEquals("ABC", iter.next()); + assertEquals("abc", iter.next()); } @Test - public void testFieldArrayObjectProp() { + void fieldArrayObjectProp() { List values = new ArrayList<>(); values.add(true); values.add(Integer.MAX_VALUE); @@ -200,26 +205,26 @@ public void testFieldArrayObjectProp() { Schema.Field f = s.getField("myField"); // object properties - Assert.assertEquals(1, f.getObjectProps().size()); + assertEquals(1, f.getObjectProps().size()); - Assert.assertTrue(f.getObjectProp("arrayProp") instanceof Collection); + assertTrue(f.getObjectProp("arrayProp") instanceof Collection); @SuppressWarnings("unchecked") Collection valueCollection = (Collection) f.getObjectProp("arrayProp"); Iterator iter = valueCollection.iterator(); - Assert.assertEquals(7, valueCollection.size()); - Assert.assertEquals(true, iter.next()); - Assert.assertEquals(Integer.MAX_VALUE, iter.next()); - Assert.assertEquals(Long.MAX_VALUE, iter.next()); + assertEquals(7, valueCollection.size()); + assertEquals(true, iter.next()); + assertEquals(Integer.MAX_VALUE, iter.next()); + assertEquals(Long.MAX_VALUE, iter.next()); // float converts to double - Assert.assertEquals(1.0d, iter.next()); - Assert.assertEquals(Double.MAX_VALUE, iter.next()); + assertEquals(1.0d, iter.next()); + assertEquals(Double.MAX_VALUE, iter.next()); // byte[] converts to string - Assert.assertEquals("ABC", iter.next()); - Assert.assertEquals("abc", iter.next()); + assertEquals("ABC", iter.next()); + assertEquals("abc", iter.next()); } @Test - public void testMapObjectProp() { + void mapObjectProp() { Map values = new HashMap<>(); values.put("booleanKey", true); values.put("intKey", Integer.MAX_VALUE); @@ -232,31 +237,31 @@ public void testMapObjectProp() { Schema s = SchemaBuilder.builder().intBuilder().prop("mapProp", values).endInt(); // object properties - Assert.assertTrue(s.getObjectProp("mapProp") instanceof Map); + assertTrue(s.getObjectProp("mapProp") instanceof Map); @SuppressWarnings("unchecked") Map valueMap = (Map) s.getObjectProp("mapProp"); - Assert.assertEquals(values.size(), valueMap.size()); - - Assert.assertTrue(valueMap.get("booleanKey") instanceof Boolean); - Assert.assertEquals(true, valueMap.get("booleanKey")); - Assert.assertTrue(valueMap.get("intKey") instanceof Integer); - Assert.assertEquals(Integer.MAX_VALUE, valueMap.get("intKey")); - Assert.assertTrue(valueMap.get("longKey") instanceof Long); - Assert.assertEquals(Long.MAX_VALUE, valueMap.get("longKey")); + assertEquals(values.size(), valueMap.size()); + + assertTrue(valueMap.get("booleanKey") instanceof Boolean); + assertEquals(true, valueMap.get("booleanKey")); + assertTrue(valueMap.get("intKey") instanceof Integer); + assertEquals(Integer.MAX_VALUE, valueMap.get("intKey")); + assertTrue(valueMap.get("longKey") instanceof Long); + assertEquals(Long.MAX_VALUE, valueMap.get("longKey")); // float converts to double - Assert.assertTrue(valueMap.get("floatKey") instanceof Double); - Assert.assertEquals(1.0d, valueMap.get("floatKey")); - Assert.assertTrue(valueMap.get("doubleKey") instanceof Double); - Assert.assertEquals(Double.MAX_VALUE, valueMap.get("doubleKey")); + assertTrue(valueMap.get("floatKey") instanceof Double); + assertEquals(1.0d, valueMap.get("floatKey")); + assertTrue(valueMap.get("doubleKey") instanceof Double); + assertEquals(Double.MAX_VALUE, valueMap.get("doubleKey")); // byte[] converts to string - Assert.assertTrue(valueMap.get("byteKey") instanceof String); - Assert.assertEquals("ABC", valueMap.get("byteKey")); - Assert.assertTrue(valueMap.get("stringKey") instanceof String); - Assert.assertEquals("abc", valueMap.get("stringKey")); + assertTrue(valueMap.get("byteKey") instanceof String); + assertEquals("ABC", valueMap.get("byteKey")); + assertTrue(valueMap.get("stringKey") instanceof String); + assertEquals("abc", valueMap.get("stringKey")); } @Test - public void testFieldMapObjectProp() { + void fieldMapObjectProp() { Map values = new HashMap<>(); values.put("booleanKey", true); values.put("intKey", Integer.MAX_VALUE); @@ -272,42 +277,46 @@ public void testFieldMapObjectProp() { Schema.Field f = s.getField("myField"); // object properties - Assert.assertTrue(f.getObjectProp("mapProp") instanceof Map); + assertTrue(f.getObjectProp("mapProp") instanceof Map); @SuppressWarnings("unchecked") Map valueMap = (Map) f.getObjectProp("mapProp"); - Assert.assertEquals(values.size(), valueMap.size()); - - Assert.assertTrue(valueMap.get("booleanKey") instanceof Boolean); - Assert.assertEquals(true, valueMap.get("booleanKey")); - Assert.assertTrue(valueMap.get("intKey") instanceof Integer); - Assert.assertEquals(Integer.MAX_VALUE, valueMap.get("intKey")); - Assert.assertTrue(valueMap.get("longKey") instanceof Long); - Assert.assertEquals(Long.MAX_VALUE, valueMap.get("longKey")); + assertEquals(values.size(), valueMap.size()); + + assertTrue(valueMap.get("booleanKey") instanceof Boolean); + assertEquals(true, valueMap.get("booleanKey")); + assertTrue(valueMap.get("intKey") instanceof Integer); + assertEquals(Integer.MAX_VALUE, valueMap.get("intKey")); + assertTrue(valueMap.get("longKey") instanceof Long); + assertEquals(Long.MAX_VALUE, valueMap.get("longKey")); // float converts to double - Assert.assertTrue(valueMap.get("floatKey") instanceof Double); - Assert.assertEquals(1.0d, valueMap.get("floatKey")); - Assert.assertTrue(valueMap.get("doubleKey") instanceof Double); - Assert.assertEquals(Double.MAX_VALUE, valueMap.get("doubleKey")); + assertTrue(valueMap.get("floatKey") instanceof Double); + assertEquals(1.0d, valueMap.get("floatKey")); + assertTrue(valueMap.get("doubleKey") instanceof Double); + assertEquals(Double.MAX_VALUE, valueMap.get("doubleKey")); // byte[] converts to string - Assert.assertTrue(valueMap.get("byteKey") instanceof String); - Assert.assertEquals("ABC", valueMap.get("byteKey")); - Assert.assertTrue(valueMap.get("stringKey") instanceof String); - Assert.assertEquals("abc", valueMap.get("stringKey")); + assertTrue(valueMap.get("byteKey") instanceof String); + assertEquals("ABC", valueMap.get("byteKey")); + assertTrue(valueMap.get("stringKey") instanceof String); + assertEquals("abc", valueMap.get("stringKey")); } - @Test(expected = AvroRuntimeException.class) - public void testNullObjectProp() { - SchemaBuilder.builder().intBuilder().prop("nullProp", (Object) null).endInt(); + @Test + void nullObjectProp() { + assertThrows(AvroRuntimeException.class, () -> { + SchemaBuilder.builder().intBuilder().prop("nullProp", (Object) null).endInt(); + }); } - @Test(expected = AvroRuntimeException.class) - public void testFieldNullObjectProp() { - SchemaBuilder.builder().record("MyRecord").fields().name("myField").prop("nullProp", (Object) null).type().intType() - .noDefault().endRecord(); + @Test + void fieldNullObjectProp() { + assertThrows(AvroRuntimeException.class, () -> { + SchemaBuilder.builder().record("MyRecord").fields().name("myField").prop("nullProp", (Object) null).type() + .intType().noDefault().endRecord(); + }); } @Test - public void testNamespaces() { + void namespaces() { Schema s1 = SchemaBuilder.record("myrecord").namespace("org.example").fields().name("myint").type().intType() .noDefault().endRecord(); Schema s2 = SchemaBuilder.record("org.example.myrecord").fields().name("myint").type().intType().noDefault() @@ -317,105 +326,107 @@ public void testNamespaces() { Schema s4 = SchemaBuilder.builder("org.example").record("myrecord").fields().name("myint").type().intType() .noDefault().endRecord(); - Assert.assertEquals("myrecord", s1.getName()); - Assert.assertEquals("myrecord", s2.getName()); - Assert.assertEquals("myrecord", s3.getName()); - Assert.assertEquals("myrecord", s4.getName()); + assertEquals("myrecord", s1.getName()); + assertEquals("myrecord", s2.getName()); + assertEquals("myrecord", s3.getName()); + assertEquals("myrecord", s4.getName()); - Assert.assertEquals("org.example", s1.getNamespace()); - Assert.assertEquals("org.example", s2.getNamespace()); - Assert.assertEquals("org.example", s3.getNamespace()); // namespace call is ignored - Assert.assertEquals("org.example", s4.getNamespace()); + assertEquals("org.example", s1.getNamespace()); + assertEquals("org.example", s2.getNamespace()); + assertEquals("org.example", s3.getNamespace()); // namespace call is ignored + assertEquals("org.example", s4.getNamespace()); - Assert.assertEquals("org.example.myrecord", s1.getFullName()); - Assert.assertEquals("org.example.myrecord", s2.getFullName()); - Assert.assertEquals("org.example.myrecord", s3.getFullName()); - Assert.assertEquals("org.example.myrecord", s4.getFullName()); + assertEquals("org.example.myrecord", s1.getFullName()); + assertEquals("org.example.myrecord", s2.getFullName()); + assertEquals("org.example.myrecord", s3.getFullName()); + assertEquals("org.example.myrecord", s4.getFullName()); } - @Test(expected = NullPointerException.class) - public void testMissingRecordName() { - SchemaBuilder.record(null).fields() // null name - .name("f0").type().stringType().noDefault().endRecord(); + @Test + void missingRecordName() { + assertThrows(NullPointerException.class, () -> { + SchemaBuilder.record(null).fields() // null name + .name("f0").type().stringType().noDefault().endRecord(); + }); } @Test - public void testBoolean() { + void testBoolean() { Schema.Type type = Schema.Type.BOOLEAN; Schema simple = SchemaBuilder.builder().booleanType(); Schema expected = primitive(type, simple); Schema built1 = SchemaBuilder.builder().booleanBuilder().prop("p", "v").endBoolean(); - Assert.assertEquals(expected, built1); + assertEquals(expected, built1); } @Test - public void testInt() { + void testInt() { Schema.Type type = Schema.Type.INT; Schema simple = SchemaBuilder.builder().intType(); Schema expected = primitive(type, simple); Schema built1 = SchemaBuilder.builder().intBuilder().prop("p", "v").endInt(); - Assert.assertEquals(expected, built1); + assertEquals(expected, built1); } @Test - public void testLong() { + void testLong() { Schema.Type type = Schema.Type.LONG; Schema simple = SchemaBuilder.builder().longType(); Schema expected = primitive(type, simple); Schema built1 = SchemaBuilder.builder().longBuilder().prop("p", "v").endLong(); - Assert.assertEquals(expected, built1); + assertEquals(expected, built1); } @Test - public void testFloat() { + void testFloat() { Schema.Type type = Schema.Type.FLOAT; Schema simple = SchemaBuilder.builder().floatType(); Schema expected = primitive(type, simple); Schema built1 = SchemaBuilder.builder().floatBuilder().prop("p", "v").endFloat(); - Assert.assertEquals(expected, built1); + assertEquals(expected, built1); } @Test - public void testDuble() { + void duble() { Schema.Type type = Schema.Type.DOUBLE; Schema simple = SchemaBuilder.builder().doubleType(); Schema expected = primitive(type, simple); Schema built1 = SchemaBuilder.builder().doubleBuilder().prop("p", "v").endDouble(); - Assert.assertEquals(expected, built1); + assertEquals(expected, built1); } @Test - public void testString() { + void string() { Schema.Type type = Schema.Type.STRING; Schema simple = SchemaBuilder.builder().stringType(); Schema expected = primitive(type, simple); Schema built1 = SchemaBuilder.builder().stringBuilder().prop("p", "v").endString(); - Assert.assertEquals(expected, built1); + assertEquals(expected, built1); } @Test - public void testBytes() { + void bytes() { Schema.Type type = Schema.Type.BYTES; Schema simple = SchemaBuilder.builder().bytesType(); Schema expected = primitive(type, simple); Schema built1 = SchemaBuilder.builder().bytesBuilder().prop("p", "v").endBytes(); - Assert.assertEquals(expected, built1); + assertEquals(expected, built1); } @Test - public void testNull() { + void testNull() { Schema.Type type = Schema.Type.NULL; Schema simple = SchemaBuilder.builder().nullType(); Schema expected = primitive(type, simple); Schema built1 = SchemaBuilder.builder().nullBuilder().prop("p", "v").endNull(); - Assert.assertEquals(expected, built1); + assertEquals(expected, built1); } private Schema primitive(Schema.Type type, Schema bare) { // test creation of bare schema by name Schema bareByName = SchemaBuilder.builder().type(type.getName()); - Assert.assertEquals(Schema.create(type), bareByName); - Assert.assertEquals(bareByName, bare); + assertEquals(Schema.create(type), bareByName); + assertEquals(bareByName, bare); // return a schema with custom prop set Schema p = Schema.create(type); p.addProp("p", "v"); @@ -434,112 +445,112 @@ private Schema primitive(Schema.Type type, Schema bare) { // } @Test - public void testRecursiveRecord() { + void recursiveRecord() { Schema schema = SchemaBuilder.record("LongList").fields().name("value").type().longType().noDefault().name("next") .type().optional().type("LongList").endRecord(); - Assert.assertEquals("LongList", schema.getName()); + assertEquals("LongList", schema.getName()); List fields = schema.getFields(); - Assert.assertEquals(2, fields.size()); - Assert.assertEquals(new Schema.Field("value", Schema.create(Schema.Type.LONG), null), fields.get(0)); + assertEquals(2, fields.size()); + assertEquals(new Schema.Field("value", Schema.create(Schema.Type.LONG), null), fields.get(0)); - Assert.assertEquals(Schema.Type.UNION, fields.get(1).schema().getType()); + assertEquals(Schema.Type.UNION, fields.get(1).schema().getType()); - Assert.assertEquals(Schema.Type.NULL, fields.get(1).schema().getTypes().get(0).getType()); + assertEquals(Schema.Type.NULL, fields.get(1).schema().getTypes().get(0).getType()); Schema recordSchema = fields.get(1).schema().getTypes().get(1); - Assert.assertEquals(Schema.Type.RECORD, recordSchema.getType()); - Assert.assertEquals("LongList", recordSchema.getName()); - Assert.assertEquals(NullNode.getInstance(), fields.get(1).defaultValue()); + assertEquals(Schema.Type.RECORD, recordSchema.getType()); + assertEquals("LongList", recordSchema.getName()); + assertEquals(NullNode.getInstance(), fields.get(1).defaultValue()); } @Test - public void testEnum() { + void testEnum() { List symbols = Arrays.asList("a", "b"); Schema expected = Schema.createEnum("myenum", null, null, symbols); expected.addProp("p", "v"); Schema schema = SchemaBuilder.enumeration("myenum").prop("p", "v").symbols("a", "b"); - Assert.assertEquals(expected, schema); + assertEquals(expected, schema); } @Test - public void testEnumWithDefault() { + void enumWithDefault() { List symbols = Arrays.asList("a", "b"); String enumDefault = "a"; Schema expected = Schema.createEnum("myenum", null, null, symbols, enumDefault); expected.addProp("p", "v"); Schema schema = SchemaBuilder.enumeration("myenum").prop("p", "v").defaultSymbol(enumDefault).symbols("a", "b"); - Assert.assertEquals(expected, schema); + assertEquals(expected, schema); } @Test - public void testFixed() { + void fixed() { Schema expected = Schema.createFixed("myfixed", null, null, 16); expected.addAlias("myOldFixed"); Schema schema = SchemaBuilder.fixed("myfixed").aliases("myOldFixed").size(16); - Assert.assertEquals(expected, schema); + assertEquals(expected, schema); } @Test - public void testArray() { + void array() { Schema longSchema = Schema.create(Schema.Type.LONG); Schema expected = Schema.createArray(longSchema); Schema schema1 = SchemaBuilder.array().items().longType(); - Assert.assertEquals(expected, schema1); + assertEquals(expected, schema1); Schema schema2 = SchemaBuilder.array().items(longSchema); - Assert.assertEquals(expected, schema2); + assertEquals(expected, schema2); Schema schema3 = SchemaBuilder.array().prop("p", "v").items().type("long"); expected.addProp("p", "v"); - Assert.assertEquals(expected, schema3); + assertEquals(expected, schema3); } @Test - public void testMap() { + void map() { Schema intSchema = Schema.create(Schema.Type.INT); Schema expected = Schema.createMap(intSchema); Schema schema1 = SchemaBuilder.map().values().intType(); - Assert.assertEquals(expected, schema1); + assertEquals(expected, schema1); Schema schema2 = SchemaBuilder.map().values(intSchema); - Assert.assertEquals(expected, schema2); + assertEquals(expected, schema2); Schema schema3 = SchemaBuilder.map().prop("p", "v").values().type("int"); expected.addProp("p", "v"); - Assert.assertEquals(expected, schema3); + assertEquals(expected, schema3); } @Test - public void testUnionAndNullable() { + void unionAndNullable() { List types = new ArrayList<>(); types.add(Schema.create(Schema.Type.LONG)); types.add(Schema.create(Schema.Type.NULL)); Schema expected = Schema.createUnion(types); Schema schema = SchemaBuilder.unionOf().longType().and().nullType().endUnion(); - Assert.assertEquals(expected, schema); + assertEquals(expected, schema); schema = SchemaBuilder.nullable().longType(); - Assert.assertEquals(expected, schema); + assertEquals(expected, schema); } @Test - public void testFields() { + void fields() { Schema rec = SchemaBuilder.record("Rec").fields().name("documented").doc("documented").type().nullType().noDefault() .name("ascending").orderAscending().type().booleanType().noDefault().name("descending").orderDescending().type() .floatType().noDefault().name("ignored").orderIgnore().type().doubleType().noDefault().name("aliased") .aliases("anAlias").type().stringType().noDefault().endRecord(); - Assert.assertEquals("documented", rec.getField("documented").doc()); - Assert.assertEquals(Order.ASCENDING, rec.getField("ascending").order()); - Assert.assertEquals(Order.DESCENDING, rec.getField("descending").order()); - Assert.assertEquals(Order.IGNORE, rec.getField("ignored").order()); - Assert.assertTrue(rec.getField("aliased").aliases().contains("anAlias")); + assertEquals("documented", rec.getField("documented").doc()); + assertEquals(Order.ASCENDING, rec.getField("ascending").order()); + assertEquals(Order.DESCENDING, rec.getField("descending").order()); + assertEquals(Order.IGNORE, rec.getField("ignored").order()); + assertTrue(rec.getField("aliased").aliases().contains("anAlias")); } @Test - public void testFieldShortcuts() { + void fieldShortcuts() { Schema full = SchemaBuilder.record("Blah").fields().name("rbool").type().booleanType().noDefault().name("obool") .type().optional().booleanType().name("nbool").type().nullable().booleanType().booleanDefault(true).name("rint") .type().intType().noDefault().name("oint").type().optional().intType().name("nint").type().nullable().intType() @@ -560,11 +571,11 @@ public void testFieldShortcuts() { .nullableString("nstring", "def").requiredBytes("rbytes").optionalBytes("obytes") .nullableBytes("nbytes", new byte[] { 1, 2, 3 }).endRecord(); - Assert.assertEquals(full, shortcut); + assertEquals(full, shortcut); } @Test - public void testNames() { + void names() { // no contextual namespace Schema r = SchemaBuilder.record("Rec").fields().name("f0").type().fixed("org.foo.MyFixed").size(1).noDefault() .name("f1").type("org.foo.MyFixed").noDefault().name("f2").type("org.foo.MyFixed", "").noDefault().name("f3") @@ -580,7 +591,7 @@ public void testNames() { // context namespace Schema f = SchemaBuilder.builder("").fixed("Foo").size(1); - Assert.assertEquals(Schema.createFixed("Foo", null, null, 1), f); + assertEquals(Schema.createFixed("Foo", null, null, 1), f); // context namespace from record matches r = SchemaBuilder.record("Rec").namespace("org.foo").fields().name("f0").type().fixed("MyFixed").size(1).noDefault() @@ -625,27 +636,33 @@ public void testNames() { } private void checkField(Schema r, Schema expected, String name) { - Assert.assertEquals(expected, r.getField(name).schema()); + assertEquals(expected, r.getField(name).schema()); } - @Test(expected = SchemaParseException.class) - public void testNamesFailRedefined() { - SchemaBuilder.record("Rec").fields().name("f0").type().enumeration("MyEnum").symbols("A", "B").enumDefault("A") - .name("f1").type().enumeration("MyEnum").symbols("X", "Y").noDefault().endRecord(); + @Test + void namesFailRedefined() { + assertThrows(SchemaParseException.class, () -> { + SchemaBuilder.record("Rec").fields().name("f0").type().enumeration("MyEnum").symbols("A", "B").enumDefault("A") + .name("f1").type().enumeration("MyEnum").symbols("X", "Y").noDefault().endRecord(); + }); } - @Test(expected = SchemaParseException.class) - public void testNamesFailAbsent() { - SchemaBuilder.builder().type("notdefined"); + @Test + void namesFailAbsent() { + assertThrows(SchemaParseException.class, () -> { + SchemaBuilder.builder().type("notdefined"); + }); } - @Test(expected = AvroTypeException.class) - public void testNameReserved() { - SchemaBuilder.fixed("long").namespace("").size(1); + @Test + void nameReserved() { + assertThrows(AvroTypeException.class, () -> { + SchemaBuilder.fixed("long").namespace("").size(1); + }); } @Test - public void testFieldTypesAndDefaultValues() { + void fieldTypesAndDefaultValues() { byte[] bytedef = new byte[] { 3 }; ByteBuffer bufdef = ByteBuffer.wrap(bytedef); String strdef = "\u0003"; @@ -689,57 +706,59 @@ public void testFieldTypesAndDefaultValues() { GenericData.Record newRec = new GenericRecordBuilder(r).build(); - Assert.assertEquals(false, newRec.get("boolF")); - Assert.assertEquals(false, newRec.get("boolU")); - Assert.assertEquals(1, newRec.get("intF")); - Assert.assertEquals(1, newRec.get("intU")); - Assert.assertEquals(2L, newRec.get("longF")); - Assert.assertEquals(2L, newRec.get("longU")); - Assert.assertEquals(3f, newRec.get("floatF")); - Assert.assertEquals(3f, newRec.get("floatU")); - Assert.assertEquals(4d, newRec.get("doubleF")); - Assert.assertEquals(4d, newRec.get("doubleU")); - Assert.assertEquals("def", newRec.get("stringF").toString()); - Assert.assertEquals("def", newRec.get("stringU").toString()); - Assert.assertEquals(bufdef, newRec.get("bytesF1")); - Assert.assertEquals(bufdef, newRec.get("bytesF2")); - Assert.assertEquals(bufdef, newRec.get("bytesF3")); - Assert.assertEquals(bufdef, newRec.get("bytesU")); - Assert.assertNull(newRec.get("nullF")); - Assert.assertNull(newRec.get("nullU")); - Assert.assertArrayEquals(bytedef, ((GenericData.Fixed) newRec.get("fixedF1")).bytes()); - Assert.assertArrayEquals(bytedef, ((GenericData.Fixed) newRec.get("fixedF2")).bytes()); - Assert.assertArrayEquals(bytedef, ((GenericData.Fixed) newRec.get("fixedF3")).bytes()); - Assert.assertArrayEquals(bytedef, ((GenericData.Fixed) newRec.get("fixedU")).bytes()); - Assert.assertEquals("S", newRec.get("enumF").toString()); - Assert.assertEquals("SS", newRec.get("enumU").toString()); + assertEquals(false, newRec.get("boolF")); + assertEquals(false, newRec.get("boolU")); + assertEquals(1, newRec.get("intF")); + assertEquals(1, newRec.get("intU")); + assertEquals(2L, newRec.get("longF")); + assertEquals(2L, newRec.get("longU")); + assertEquals(3f, newRec.get("floatF")); + assertEquals(3f, newRec.get("floatU")); + assertEquals(4d, newRec.get("doubleF")); + assertEquals(4d, newRec.get("doubleU")); + assertEquals("def", newRec.get("stringF").toString()); + assertEquals("def", newRec.get("stringU").toString()); + assertEquals(bufdef, newRec.get("bytesF1")); + assertEquals(bufdef, newRec.get("bytesF2")); + assertEquals(bufdef, newRec.get("bytesF3")); + assertEquals(bufdef, newRec.get("bytesU")); + assertNull(newRec.get("nullF")); + assertNull(newRec.get("nullU")); + assertArrayEquals(bytedef, ((GenericData.Fixed) newRec.get("fixedF1")).bytes()); + assertArrayEquals(bytedef, ((GenericData.Fixed) newRec.get("fixedF2")).bytes()); + assertArrayEquals(bytedef, ((GenericData.Fixed) newRec.get("fixedF3")).bytes()); + assertArrayEquals(bytedef, ((GenericData.Fixed) newRec.get("fixedU")).bytes()); + assertEquals("S", newRec.get("enumF").toString()); + assertEquals("SS", newRec.get("enumU").toString()); @SuppressWarnings("unchecked") Map map = (Map) newRec.get("mapF"); - Assert.assertEquals(mapdef.size(), map.size()); + assertEquals(mapdef.size(), map.size()); for (Map.Entry e : map.entrySet()) { - Assert.assertEquals(mapdef.get(e.getKey().toString()), e.getValue().toString()); + assertEquals(mapdef.get(e.getKey().toString()), e.getValue().toString()); } - Assert.assertEquals(newRec.get("mapF"), newRec.get("mapU")); + assertEquals(newRec.get("mapF"), newRec.get("mapU")); @SuppressWarnings("unchecked") GenericData.Array arr = (GenericData.Array) newRec.get("arrayF"); - Assert.assertEquals(arrdef.size(), arr.size()); + assertEquals(arrdef.size(), arr.size()); for (CharSequence c : arr) { - Assert.assertTrue(arrdef.contains(c.toString())); + assertTrue(arrdef.contains(c.toString())); } - Assert.assertEquals(newRec.get("arrayF"), newRec.get("arrayU")); - Assert.assertEquals(recdef, newRec.get("recordF")); - Assert.assertEquals(recdef2, newRec.get("recordU")); - Assert.assertEquals("S", newRec.get("byName").toString()); + assertEquals(newRec.get("arrayF"), newRec.get("arrayU")); + assertEquals(recdef, newRec.get("recordF")); + assertEquals(recdef2, newRec.get("recordU")); + assertEquals("S", newRec.get("byName").toString()); } - @Test(expected = SchemaBuilderException.class) - public void testBadDefault() { - SchemaBuilder.record("r").fields().name("f").type(Schema.create(Schema.Type.INT)).withDefault(new Object()) - .endRecord(); + @Test + void badDefault() { + assertThrows(SchemaBuilderException.class, () -> { + SchemaBuilder.record("r").fields().name("f").type(Schema.create(Schema.Type.INT)).withDefault(new Object()) + .endRecord(); + }); } @Test - public void testUnionFieldBuild() { + void unionFieldBuild() { SchemaBuilder.record("r").fields().name("allUnion").type().unionOf().booleanType().and().intType().and().longType() .and().floatType().and().doubleType().and().stringType().and().bytesType().and().nullType().and().fixed("Fix") .size(1).and().enumeration("Enu").symbols("Q").and().array().items().intType().and().map().values().longType() @@ -748,27 +767,27 @@ public void testUnionFieldBuild() { } @Test - public void testDefaults() throws IOException { + void defaults() throws IOException { Schema writeSchema = SchemaBuilder.record("r").fields().name("requiredInt").type().intType().noDefault() .name("optionalInt").type().optional().intType().name("nullableIntWithDefault").type().nullable().intType() .intDefault(3).endRecord(); GenericData.Record rec1 = new GenericRecordBuilder(writeSchema).set("requiredInt", 1).build(); - Assert.assertEquals(1, rec1.get("requiredInt")); - Assert.assertEquals(null, rec1.get("optionalInt")); - Assert.assertEquals(3, rec1.get("nullableIntWithDefault")); + assertEquals(1, rec1.get("requiredInt")); + assertNull(rec1.get("optionalInt")); + assertEquals(3, rec1.get("nullableIntWithDefault")); GenericData.Record rec2 = new GenericRecordBuilder(writeSchema).set("requiredInt", 1).set("optionalInt", 2) .set("nullableIntWithDefault", 13).build(); - Assert.assertEquals(1, rec2.get("requiredInt")); - Assert.assertEquals(2, rec2.get("optionalInt")); - Assert.assertEquals(13, rec2.get("nullableIntWithDefault")); + assertEquals(1, rec2.get("requiredInt")); + assertEquals(2, rec2.get("optionalInt")); + assertEquals(13, rec2.get("nullableIntWithDefault")); // write to file - File file = new File(DIR.getRoot().getPath(), "testDefaults.avro"); + File file = new File(DIR.getPath(), "testDefaults.avro"); try (DataFileWriter writer = new DataFileWriter<>(new GenericDatumWriter<>())) { writer.create(writeSchema, file); @@ -785,24 +804,24 @@ public void testDefaults() throws IOException { new GenericDatumReader<>(writeSchema, readSchema))) { GenericData.Record rec1read = reader.iterator().next(); - Assert.assertEquals(1, rec1read.get("requiredInt")); - Assert.assertNull(rec1read.get("optionalInt")); - Assert.assertEquals(3, rec1read.get("nullableIntWithDefault")); - Assert.assertNull(rec1read.get("newOptionalInt")); - Assert.assertEquals(5, rec1read.get("newNullableIntWithDefault")); + assertEquals(1, rec1read.get("requiredInt")); + assertNull(rec1read.get("optionalInt")); + assertEquals(3, rec1read.get("nullableIntWithDefault")); + assertNull(rec1read.get("newOptionalInt")); + assertEquals(5, rec1read.get("newNullableIntWithDefault")); GenericData.Record rec2read = reader.iterator().next(); - Assert.assertEquals(1, rec2read.get("requiredInt")); - Assert.assertEquals(2, rec2read.get("optionalInt")); - Assert.assertEquals(13, rec2read.get("nullableIntWithDefault")); - Assert.assertNull(rec2read.get("newOptionalInt")); - Assert.assertEquals(5, rec2read.get("newNullableIntWithDefault")); + assertEquals(1, rec2read.get("requiredInt")); + assertEquals(2, rec2read.get("optionalInt")); + assertEquals(13, rec2read.get("nullableIntWithDefault")); + assertNull(rec2read.get("newOptionalInt")); + assertEquals(5, rec2read.get("newNullableIntWithDefault")); } } @Test - public void testDefaultTypes() { + void defaultTypes() { Integer intDef = 1; Long longDef = 2L; Float floatDef = 3F; @@ -811,34 +830,71 @@ public void testDefaultTypes() { .type().longType().longDefault(longDef).name("float").type().floatType().floatDefault(floatDef).name("double") .type().doubleType().doubleDefault(doubleDef).endRecord(); - Assert.assertEquals("int field default type or value mismatch", intDef, schema.getField("int").defaultVal()); - Assert.assertEquals("long field default type or value mismatch", longDef, schema.getField("long").defaultVal()); - Assert.assertEquals("float field default type or value mismatch", floatDef, schema.getField("float").defaultVal()); - Assert.assertEquals("double field default type or value mismatch", doubleDef, - schema.getField("double").defaultVal()); + assertEquals(intDef, schema.getField("int").defaultVal(), "int field default type or value mismatch"); + assertEquals(longDef, schema.getField("long").defaultVal(), "long field default type or value mismatch"); + assertEquals(floatDef, schema.getField("float").defaultVal(), "float field default type or value mismatch"); + assertEquals(doubleDef, schema.getField("double").defaultVal(), "double field default type or value mismatch"); } - @Test(expected = AvroRuntimeException.class) - public void testValidateDefaultsEnabled() { - try { - SchemaBuilder.record("ValidationRecord").fields().name("IntegerField").type("int").withDefault("Invalid") - .endRecord(); - } catch (AvroRuntimeException e) { - Assert.assertEquals("Default behavior is to raise an exception due to record having an invalid default", - "Invalid default for field IntegerField: \"Invalid\" not a \"int\"", e.getMessage()); - throw e; - } + @Test + void validateDefaultsEnabled() { + assertThrows(AvroRuntimeException.class, () -> { + try { + SchemaBuilder.record("ValidationRecord").fields().name("IntegerField").type("int").withDefault("Invalid") + .endRecord(); + } catch (AvroRuntimeException e) { + assertEquals("Invalid default for field IntegerField: \"Invalid\" not a \"int\"", e.getMessage(), + "Default behavior is to raise an exception due to record having an invalid default"); + throw e; + } + }); } @Test - public void testValidateDefaultsDisabled() { + void validateDefaultsDisabled() { final String fieldName = "IntegerField"; final String defaultValue = "foo"; Schema schema = SchemaBuilder.record("ValidationRecord").fields().name(fieldName).notValidatingDefaults() .type("int").withDefault(defaultValue) // Would throw an exception on endRecord() if validations enabled .endRecord(); - Assert.assertNull("Differing types, so this returns null", schema.getField(fieldName).defaultVal()); - Assert.assertEquals("Schema is able to be successfully created as is without validation", defaultValue, - schema.getField(fieldName).defaultValue().asText()); + assertNull(schema.getField(fieldName).defaultVal(), "Differing types, so this returns null"); + assertEquals(defaultValue, schema.getField(fieldName).defaultValue().asText(), + "Schema is able to be successfully created as is without validation"); + } + + /** + * https://issues.apache.org/jira/browse/AVRO-1965 + */ + @Test + void namespaceDefaulting() { + Schema d = SchemaBuilder.builder().intType(); + Schema c = SchemaBuilder.record("c").fields().name("d").type(d).noDefault().endRecord(); + Schema b = SchemaBuilder.record("b").fields().name("c").type(c).noDefault().endRecord(); + + Schema a1 = SchemaBuilder.record("default.a").fields().name("b").type(b).noDefault().endRecord(); + Schema a2 = SchemaParser.parseSingle(a1.toString()); + + assertEquals(a2, a1); + } + + @Test + void namesAcceptAll() throws InterruptedException { + // Ensure that Schema.setNameValidator won't interfere with others unit tests. + Runnable r = () -> { + Schema.setNameValidator(NameValidator.NO_VALIDATION); + final Schema schema = SchemaBuilder.record("7name").fields().name("123").type(Schema.create(Schema.Type.INT)) + .noDefault().endRecord(); + Assertions.assertNotNull(schema); + Assertions.assertEquals("7name", schema.getName()); + final Schema.Field field = schema.getField("123"); + Assertions.assertEquals("123", field.name()); + }; + + final Throwable[] exception = new Throwable[] { null }; + Thread t = new Thread(r); + t.setUncaughtExceptionHandler((Thread th, Throwable e) -> exception[0] = e); + t.start(); + t.join(); + Assertions.assertNull(exception[0], () -> exception[0].getMessage()); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCommons.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCommons.java new file mode 100644 index 00000000000..05890846858 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCommons.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import java.io.File; +import java.io.IOException; +import java.util.Arrays; +import java.util.stream.Stream; + +import org.apache.avro.file.DataFileReader; +import org.apache.avro.file.DataFileWriter; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.io.DatumWriter; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class TestSchemaCommons { + private static final Logger LOG = LoggerFactory.getLogger(TestSchemaCommons.class); + + @ParameterizedTest + @MethodSource("sharedFolders") + void runFolder(final File folder) throws IOException { + final File schemaSource = new File(folder, "schema.json"); + final File data = new File(folder, "data.avro"); + + if (!schemaSource.exists()) { + LOG.warn("No 'schema.json' file on folder {}", folder.getPath()); + return; + } + final Schema schema = new SchemaParser().parse(schemaSource).mainSchema(); + assertNotNull(schema); + + if (!data.exists()) { + LOG.warn("No 'data.avro' file on folder {}", folder.getPath()); + return; + } + + // output file + final String rootTest = Thread.currentThread().getContextClassLoader().getResource(".").getPath(); + final File copyData = new File(rootTest, "copy.avro"); + + // Deserialize from disk + DatumWriter datumWriter = new GenericDatumWriter<>(schema); + GenericDatumReader datumReader = new GenericDatumReader<>(schema); + try (DataFileReader dataFileReader = new DataFileReader<>(data, datumReader); + DataFileWriter dataFileWriter = new DataFileWriter<>(datumWriter)) { + dataFileWriter.create(schema, copyData); + GenericRecord record = null; + int counter = 0; + while (dataFileReader.hasNext()) { + record = dataFileReader.next(); + counter++; + assertNotNull(record); + dataFileWriter.append(record); + } + assertTrue(counter > 0, "no data in file"); + } + + // Cleanup + assertTrue(copyData.delete()); + } + + public static Stream sharedFolders() { + File root = new File("target/test-classes/share/test/data/schemas"); + return Arrays.stream(root.listFiles(File::isDirectory)).map(Arguments::of); + } + +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibility.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibility.java index 27d47d221c4..12531e9e226 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibility.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibility.java @@ -39,6 +39,9 @@ import static org.apache.avro.TestSchemas.EMPTY_UNION_SCHEMA; import static org.apache.avro.TestSchemas.ENUM1_ABC_SCHEMA; import static org.apache.avro.TestSchemas.ENUM1_AB_SCHEMA; +import static org.apache.avro.TestSchemas.ENUM1_AB_SCHEMA_DEFAULT; +import static org.apache.avro.TestSchemas.ENUM1_AB_SCHEMA_NAMESPACE_1; +import static org.apache.avro.TestSchemas.ENUM1_AB_SCHEMA_NAMESPACE_2; import static org.apache.avro.TestSchemas.ENUM1_BC_SCHEMA; import static org.apache.avro.TestSchemas.ENUM_ABC_ENUM_DEFAULT_A_RECORD; import static org.apache.avro.TestSchemas.ENUM_ABC_ENUM_DEFAULT_A_SCHEMA; @@ -65,6 +68,8 @@ import static org.apache.avro.TestSchemas.LONG_UNION_SCHEMA; import static org.apache.avro.TestSchemas.NS_RECORD1; import static org.apache.avro.TestSchemas.NS_RECORD2; +import static org.apache.avro.TestSchemas.WITH_NS; +import static org.apache.avro.TestSchemas.WITHOUT_NS; import static org.apache.avro.TestSchemas.NULL_SCHEMA; import static org.apache.avro.TestSchemas.ReaderWriter; import static org.apache.avro.TestSchemas.STRING_ARRAY_SCHEMA; @@ -73,9 +78,7 @@ import static org.apache.avro.TestSchemas.STRING_UNION_SCHEMA; import static org.apache.avro.TestSchemas.assertSchemaContains; import static org.apache.avro.TestSchemas.list; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; +import static org.junit.jupiter.api.Assertions.*; import java.io.BufferedReader; import java.io.ByteArrayOutputStream; @@ -100,7 +103,7 @@ import org.apache.avro.io.Encoder; import org.apache.avro.io.EncoderFactory; import org.apache.avro.util.Utf8; -import org.junit.Test; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -115,9 +118,9 @@ public class TestSchemaCompatibility { new Schema.Field("oldfield1", INT_SCHEMA, null, null), new Schema.Field("oldfield2", STRING_SCHEMA, null, null))); @Test - public void testValidateSchemaPairMissingField() { + void validateSchemaPairMissingField() { final List readerFields = list(new Schema.Field("oldfield1", INT_SCHEMA, null, null)); - final Schema reader = Schema.createRecord(readerFields); + final Schema reader = Schema.createRecord(null, null, null, false, readerFields); final SchemaCompatibility.SchemaPairCompatibility expectedResult = new SchemaCompatibility.SchemaPairCompatibility( SchemaCompatibility.SchemaCompatibilityResult.compatible(), reader, WRITER_SCHEMA, SchemaCompatibility.READER_WRITER_COMPATIBLE_MESSAGE); @@ -127,9 +130,9 @@ public void testValidateSchemaPairMissingField() { } @Test - public void testValidateSchemaPairMissingSecondField() { + void validateSchemaPairMissingSecondField() { final List readerFields = list(new Schema.Field("oldfield2", STRING_SCHEMA, null, null)); - final Schema reader = Schema.createRecord(readerFields); + final Schema reader = Schema.createRecord(null, null, null, false, readerFields); final SchemaCompatibility.SchemaPairCompatibility expectedResult = new SchemaCompatibility.SchemaPairCompatibility( SchemaCompatibility.SchemaCompatibilityResult.compatible(), reader, WRITER_SCHEMA, SchemaCompatibility.READER_WRITER_COMPATIBLE_MESSAGE); @@ -139,10 +142,10 @@ public void testValidateSchemaPairMissingSecondField() { } @Test - public void testValidateSchemaPairAllFields() { + void validateSchemaPairAllFields() { final List readerFields = list(new Schema.Field("oldfield1", INT_SCHEMA, null, null), new Schema.Field("oldfield2", STRING_SCHEMA, null, null)); - final Schema reader = Schema.createRecord(readerFields); + final Schema reader = Schema.createRecord(null, null, null, false, readerFields); final SchemaCompatibility.SchemaPairCompatibility expectedResult = new SchemaCompatibility.SchemaPairCompatibility( SchemaCompatibility.SchemaCompatibilityResult.compatible(), reader, WRITER_SCHEMA, SchemaCompatibility.READER_WRITER_COMPATIBLE_MESSAGE); @@ -152,10 +155,10 @@ public void testValidateSchemaPairAllFields() { } @Test - public void testValidateSchemaNewFieldWithDefault() { + void validateSchemaNewFieldWithDefault() { final List readerFields = list(new Schema.Field("oldfield1", INT_SCHEMA, null, null), new Schema.Field("newfield1", INT_SCHEMA, null, 42)); - final Schema reader = Schema.createRecord(readerFields); + final Schema reader = Schema.createRecord(null, null, null, false, readerFields); final SchemaCompatibility.SchemaPairCompatibility expectedResult = new SchemaCompatibility.SchemaPairCompatibility( SchemaCompatibility.SchemaCompatibilityResult.compatible(), reader, WRITER_SCHEMA, SchemaCompatibility.READER_WRITER_COMPATIBLE_MESSAGE); @@ -165,10 +168,10 @@ public void testValidateSchemaNewFieldWithDefault() { } @Test - public void testValidateSchemaNewField() { + void validateSchemaNewField() { final List readerFields = list(new Schema.Field("oldfield1", INT_SCHEMA, null, null), new Schema.Field("newfield1", INT_SCHEMA, null, null)); - final Schema reader = Schema.createRecord(readerFields); + final Schema reader = Schema.createRecord(null, null, null, false, readerFields); SchemaPairCompatibility compatibility = checkReaderWriterCompatibility(reader, WRITER_SCHEMA); // Test new field without default value. @@ -184,7 +187,7 @@ public void testValidateSchemaNewField() { } @Test - public void testValidateArrayWriterSchema() { + void validateArrayWriterSchema() { final Schema validReader = Schema.createArray(STRING_SCHEMA); final Schema invalidReader = Schema.createMap(STRING_SCHEMA); final SchemaCompatibility.SchemaPairCompatibility validResult = new SchemaCompatibility.SchemaPairCompatibility( @@ -204,7 +207,7 @@ public void testValidateArrayWriterSchema() { } @Test - public void testValidatePrimitiveWriterSchema() { + void validatePrimitiveWriterSchema() { final Schema validReader = Schema.create(Schema.Type.STRING); final SchemaCompatibility.SchemaPairCompatibility validResult = new SchemaCompatibility.SchemaPairCompatibility( SchemaCompatibility.SchemaCompatibilityResult.compatible(), validReader, STRING_SCHEMA, @@ -225,11 +228,27 @@ public void testValidatePrimitiveWriterSchema() { * Reader union schema must contain all writer union branches. */ @Test - public void testUnionReaderWriterSubsetIncompatibility() { + void unionReaderWriterSubsetIncompatibility() { final Schema unionWriter = Schema.createUnion(list(INT_SCHEMA, STRING_SCHEMA, LONG_SCHEMA)); final Schema unionReader = Schema.createUnion(list(INT_SCHEMA, STRING_SCHEMA)); final SchemaPairCompatibility result = checkReaderWriterCompatibility(unionReader, unionWriter); assertEquals(SchemaCompatibilityType.INCOMPATIBLE, result.getType()); + assertEquals("/2", result.getResult().getIncompatibilities().get(0).getLocation()); + } + + @Test + void unionWriterSimpleReaderIncompatibility() { + Schema mandatorySchema = SchemaBuilder.record("Account").fields().name("age").type().intType().noDefault() + .endRecord(); + Schema optionalSchema = SchemaBuilder.record("Account").fields().optionalInt("age").endRecord(); + + SchemaPairCompatibility compatibility = checkReaderWriterCompatibility(mandatorySchema, optionalSchema); + + assertEquals(SchemaCompatibilityType.INCOMPATIBLE, compatibility.getType()); + + Incompatibility incompatibility = compatibility.getResult().getIncompatibilities().get(0); + assertEquals("reader type: INT not compatible with writer type: NULL", incompatibility.getMessage()); + assertEquals("/fields/0/type/0", incompatibility.getLocation()); } // ----------------------------------------------------------------------------------------------- @@ -259,6 +278,10 @@ public void testUnionReaderWriterSubsetIncompatibility() { new ReaderWriter(INT_MAP_SCHEMA, INT_MAP_SCHEMA), new ReaderWriter(LONG_MAP_SCHEMA, INT_MAP_SCHEMA), new ReaderWriter(ENUM1_AB_SCHEMA, ENUM1_AB_SCHEMA), new ReaderWriter(ENUM1_ABC_SCHEMA, ENUM1_AB_SCHEMA), + new ReaderWriter(ENUM1_AB_SCHEMA_DEFAULT, ENUM1_ABC_SCHEMA), + new ReaderWriter(ENUM1_AB_SCHEMA, ENUM1_AB_SCHEMA_NAMESPACE_1), + new ReaderWriter(ENUM1_AB_SCHEMA_NAMESPACE_1, ENUM1_AB_SCHEMA), + new ReaderWriter(ENUM1_AB_SCHEMA_NAMESPACE_1, ENUM1_AB_SCHEMA_NAMESPACE_2), // String-to/from-bytes, introduced in Avro 1.7.7 new ReaderWriter(STRING_SCHEMA, BYTES_SCHEMA), new ReaderWriter(BYTES_SCHEMA, STRING_SCHEMA), @@ -315,7 +338,7 @@ public void testUnionReaderWriterSubsetIncompatibility() { // This is comparing two records that have an inner array of records with // different namespaces. - new ReaderWriter(NS_RECORD1, NS_RECORD2)); + new ReaderWriter(NS_RECORD1, NS_RECORD2), new ReaderWriter(WITHOUT_NS, WITH_NS)); // ----------------------------------------------------------------------------------------------- @@ -362,14 +385,14 @@ public static void validateIncompatibleSchemas(Schema reader, Schema writer, * Tests reader/writer compatibility validation. */ @Test - public void testReaderWriterCompatibility() { + void readerWriterCompatibility() { for (ReaderWriter readerWriter : COMPATIBLE_READER_WRITER_TEST_CASES) { final Schema reader = readerWriter.getReader(); final Schema writer = readerWriter.getWriter(); LOG.debug("Testing compatibility of reader {} with writer {}.", reader, writer); final SchemaPairCompatibility result = checkReaderWriterCompatibility(reader, writer); - assertEquals(String.format("Expecting reader %s to be compatible with writer %s, but tested incompatible.", - reader, writer), SchemaCompatibilityType.COMPATIBLE, result.getType()); + assertEquals(SchemaCompatibilityType.COMPATIBLE, result.getType(), String + .format("Expecting reader %s to be compatible with writer %s, but tested incompatible.", reader, writer)); } } @@ -460,7 +483,7 @@ ENUM_AB_ENUM_DEFAULT_A_SCHEMA, new EnumSymbol(ENUM_AB_ENUM_DEFAULT_A_SCHEMA, "A" * Tests the reader/writer compatibility at decoding time. */ @Test - public void testReaderWriterDecodingCompatibility() throws Exception { + void readerWriterDecodingCompatibility() throws Exception { for (DecodingTestCase testCase : DECODING_COMPATIBILITY_TEST_CASES) { final Schema readerSchema = testCase.getReaderSchema(); final Schema writerSchema = testCase.getWriterSchema(); @@ -483,10 +506,11 @@ public void testReaderWriterDecodingCompatibility() throws Exception { final DatumReader datumReader = new GenericDatumReader<>(readerSchema); final Object decodedDatum = datumReader.read(null, decoder); - assertEquals(String.format( - "Expecting decoded value %s when decoding value %s whose writer schema is %s " - + "using reader schema %s, but value was %s.", - expectedDecodedDatum, datum, writerSchema, readerSchema, decodedDatum), expectedDecodedDatum, decodedDatum); + assertEquals(expectedDecodedDatum, decodedDatum, + String.format( + "Expecting decoded value %s when decoding value %s whose writer schema is %s " + + "using reader schema %s, but value was %s.", + expectedDecodedDatum, datum, writerSchema, readerSchema, decodedDatum)); } } @@ -495,12 +519,12 @@ private Schema readSchemaFromResources(String name) throws IOException { final String result = new BufferedReader(new InputStreamReader(inputStream)).lines() .collect(Collectors.joining("\n")); - return new Schema.Parser().parse(result); + return SchemaParser.parseSingle(result); } } @Test - public void checkResolvingDecoder() throws IOException { + void checkResolvingDecoder() throws IOException { final Schema locationSchema = readSchemaFromResources("schema-location.json"); final Schema writeSchema = readSchemaFromResources("schema-location-write.json"); diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityEnumDefaults.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityEnumDefaults.java index 33fc5ce3d33..44d468c7dea 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityEnumDefaults.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityEnumDefaults.java @@ -22,9 +22,13 @@ import static org.apache.avro.TestSchemas.ENUM2_AB_SCHEMA; import static org.apache.avro.TestSchemas.ENUM_ABC_ENUM_DEFAULT_A_SCHEMA; import static org.apache.avro.TestSchemas.ENUM_AB_ENUM_DEFAULT_A_SCHEMA; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; import java.io.ByteArrayOutputStream; +import java.util.concurrent.Callable; + import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; @@ -35,18 +39,12 @@ import org.apache.avro.io.DecoderFactory; import org.apache.avro.io.Encoder; import org.apache.avro.io.EncoderFactory; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; +import org.junit.jupiter.api.Test; public class TestSchemaCompatibilityEnumDefaults { - @Rule - public ExpectedException expectedException = ExpectedException.none(); @Test - public void testEnumDefaultNotAppliedWhenWriterFieldMissing() throws Exception { - expectedException.expect(AvroTypeException.class); - expectedException.expectMessage("Found Record1, expecting Record1, missing required field field1"); + void enumDefaultNotAppliedWhenWriterFieldMissing() throws Exception { Schema writerSchema = SchemaBuilder.record("Record1").fields().name("field2").type(ENUM2_AB_SCHEMA).noDefault() .endRecord(); @@ -56,11 +54,14 @@ public void testEnumDefaultNotAppliedWhenWriterFieldMissing() throws Exception { GenericRecord datum = new GenericData.Record(writerSchema); datum.put("field2", new GenericData.EnumSymbol(writerSchema, "B")); - serializeWithWriterThenDeserializeWithReader(writerSchema, datum, readerSchema); + AvroTypeException avroTypeException = assertThrows(AvroTypeException.class, + () -> serializeWithWriterThenDeserializeWithReader(writerSchema, datum, readerSchema)); + assertTrue( + avroTypeException.getMessage().contains("Found Record1, expecting Record1, missing required field field1")); } @Test - public void testEnumDefaultAppliedWhenNoFieldDefaultDefined() throws Exception { + void enumDefaultAppliedWhenNoFieldDefaultDefined() throws Exception { Schema writerSchema = SchemaBuilder.record("Record1").fields().name("field1").type(ENUM_ABC_ENUM_DEFAULT_A_SCHEMA) .noDefault().endRecord(); @@ -75,7 +76,7 @@ public void testEnumDefaultAppliedWhenNoFieldDefaultDefined() throws Exception { } @Test - public void testEnumDefaultNotAppliedWhenCompatibleSymbolIsFound() throws Exception { + void enumDefaultNotAppliedWhenCompatibleSymbolIsFound() throws Exception { Schema writerSchema = SchemaBuilder.record("Record1").fields().name("field1").type(ENUM_ABC_ENUM_DEFAULT_A_SCHEMA) .noDefault().endRecord(); @@ -89,7 +90,7 @@ public void testEnumDefaultNotAppliedWhenCompatibleSymbolIsFound() throws Except } @Test - public void testEnumDefaultAppliedWhenFieldDefaultDefined() throws Exception { + void enumDefaultAppliedWhenFieldDefaultDefined() throws Exception { Schema writerSchema = SchemaBuilder.record("Record1").fields().name("field1").type(ENUM_ABC_ENUM_DEFAULT_A_SCHEMA) .noDefault().endRecord(); @@ -104,10 +105,7 @@ public void testEnumDefaultAppliedWhenFieldDefaultDefined() throws Exception { } @Test - public void testFieldDefaultNotAppliedForUnknownSymbol() throws Exception { - expectedException.expect(AvroTypeException.class); - expectedException.expectMessage("No match for C"); - + void fieldDefaultNotAppliedForUnknownSymbol() throws Exception { Schema writerSchema = SchemaBuilder.record("Record1").fields().name("field1").type(ENUM1_ABC_SCHEMA).noDefault() .endRecord(); Schema readerSchema = SchemaBuilder.record("Record1").fields().name("field1").type(ENUM1_AB_SCHEMA).withDefault("A") @@ -115,7 +113,9 @@ public void testFieldDefaultNotAppliedForUnknownSymbol() throws Exception { GenericRecord datum = new GenericData.Record(writerSchema); datum.put("field1", new GenericData.EnumSymbol(writerSchema, "C")); - serializeWithWriterThenDeserializeWithReader(writerSchema, datum, readerSchema); + AvroTypeException avroTypeException = assertThrows(AvroTypeException.class, + () -> serializeWithWriterThenDeserializeWithReader(writerSchema, datum, readerSchema)); + assertEquals("Field \"field1\" content mismatch: No match for C", avroTypeException.getMessage()); } private GenericRecord serializeWithWriterThenDeserializeWithReader(Schema writerSchema, GenericRecord datum, @@ -129,8 +129,9 @@ private GenericRecord serializeWithWriterThenDeserializeWithReader(Schema writer byte[] bytes = baos.toByteArray(); Decoder decoder = DecoderFactory.get().resolvingDecoder(writerSchema, readerSchema, DecoderFactory.get().binaryDecoder(bytes, null)); - DatumReader datumReader = new GenericDatumReader<>(readerSchema); + GenericData data = new GenericData(); + data.setFastReaderEnabled(false); + DatumReader datumReader = new GenericDatumReader<>(readerSchema, readerSchema, data); return (GenericRecord) datumReader.read(null, decoder); } - } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityFixedSizeMismatch.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityFixedSizeMismatch.java index 6ac3c68dc03..05321527cb4 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityFixedSizeMismatch.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityFixedSizeMismatch.java @@ -17,44 +17,34 @@ */ package org.apache.avro; -import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; -import static org.apache.avro.TestSchemas.*; +import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import java.util.Arrays; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; -import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameter; -import org.junit.runners.Parameterized.Parameters; +import java.util.stream.Stream; + +import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; +import static org.apache.avro.TestSchemas.A_DINT_B_DFIXED_4_BYTES_RECORD1; +import static org.apache.avro.TestSchemas.A_DINT_B_DFIXED_8_BYTES_RECORD1; +import static org.apache.avro.TestSchemas.FIXED_4_BYTES; +import static org.apache.avro.TestSchemas.FIXED_8_BYTES; -@RunWith(Parameterized.class) public class TestSchemaCompatibilityFixedSizeMismatch { - @Parameters(name = "r: {0} | w: {1}") - public static Iterable data() { - Object[][] fields = { // - { FIXED_4_BYTES, FIXED_8_BYTES, "expected: 8, found: 4", "/size" }, - { FIXED_8_BYTES, FIXED_4_BYTES, "expected: 4, found: 8", "/size" }, - { A_DINT_B_DFIXED_8_BYTES_RECORD1, A_DINT_B_DFIXED_4_BYTES_RECORD1, "expected: 4, found: 8", - "/fields/1/type/size" }, - { A_DINT_B_DFIXED_4_BYTES_RECORD1, A_DINT_B_DFIXED_8_BYTES_RECORD1, "expected: 8, found: 4", - "/fields/1/type/size" }, }; - return Arrays.asList(fields); + public static Stream data() { + return Stream.of(Arguments.of(FIXED_4_BYTES, FIXED_8_BYTES, "expected: 8, found: 4", "/size"), + Arguments.of(FIXED_8_BYTES, FIXED_4_BYTES, "expected: 4, found: 8", "/size"), + Arguments.of(A_DINT_B_DFIXED_8_BYTES_RECORD1, A_DINT_B_DFIXED_4_BYTES_RECORD1, "expected: 4, found: 8", + "/fields/1/type/size"), + Arguments.of(A_DINT_B_DFIXED_4_BYTES_RECORD1, A_DINT_B_DFIXED_8_BYTES_RECORD1, "expected: 8, found: 4", + "/fields/1/type/size")); } - @Parameter(0) - public Schema reader; - @Parameter(1) - public Schema writer; - @Parameter(2) - public String details; - @Parameter(3) - public String location; - - @Test - public void testFixedSizeMismatchSchemas() throws Exception { + @ParameterizedTest + @MethodSource("data") + void fixedSizeMismatchSchemas(Schema reader, Schema writer, String details, String location) { validateIncompatibleSchemas(reader, writer, SchemaIncompatibilityType.FIXED_SIZE_MISMATCH, details, location); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityMissingEnumSymbols.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityMissingEnumSymbols.java index 82b70fe2443..63d607cd596 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityMissingEnumSymbols.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityMissingEnumSymbols.java @@ -17,19 +17,19 @@ */ package org.apache.avro; -import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; -import static org.apache.avro.TestSchemas.*; +import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import java.util.Arrays; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; -import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameter; -import org.junit.runners.Parameterized.Parameters; +import java.util.stream.Stream; + +import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; +import static org.apache.avro.TestSchemas.ENUM1_ABC_SCHEMA; +import static org.apache.avro.TestSchemas.ENUM1_AB_SCHEMA; +import static org.apache.avro.TestSchemas.ENUM1_BC_SCHEMA; -@RunWith(Parameterized.class) public class TestSchemaCompatibilityMissingEnumSymbols { private static final Schema RECORD1_WITH_ENUM_AB = SchemaBuilder.record("Record1").fields() // @@ -39,26 +39,15 @@ public class TestSchemaCompatibilityMissingEnumSymbols { .name("field1").type(ENUM1_ABC_SCHEMA).noDefault() // .endRecord(); - @Parameters(name = "r: {0} | w: {1}") - public static Iterable data() { - Object[][] fields = { // - { ENUM1_AB_SCHEMA, ENUM1_ABC_SCHEMA, "[C]", "/symbols" }, - { ENUM1_BC_SCHEMA, ENUM1_ABC_SCHEMA, "[A]", "/symbols" }, - { RECORD1_WITH_ENUM_AB, RECORD1_WITH_ENUM_ABC, "[C]", "/fields/0/type/symbols" } }; - return Arrays.asList(fields); + public static Stream data() { + return Stream.of(Arguments.of(ENUM1_AB_SCHEMA, ENUM1_ABC_SCHEMA, "[C]", "/symbols"), + Arguments.of(ENUM1_BC_SCHEMA, ENUM1_ABC_SCHEMA, "[A]", "/symbols"), + Arguments.of(RECORD1_WITH_ENUM_AB, RECORD1_WITH_ENUM_ABC, "[C]", "/fields/0/type/symbols")); } - @Parameter(0) - public Schema reader; - @Parameter(1) - public Schema writer; - @Parameter(2) - public String details; - @Parameter(3) - public String location; - - @Test - public void testTypeMismatchSchemas() throws Exception { + @ParameterizedTest + @MethodSource("data") + public void testTypeMismatchSchemas(Schema reader, Schema writer, String details, String location) { validateIncompatibleSchemas(reader, writer, SchemaIncompatibilityType.MISSING_ENUM_SYMBOLS, details, location); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityMissingUnionBranch.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityMissingUnionBranch.java index 4f947690009..3e84a5337c9 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityMissingUnionBranch.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityMissingUnionBranch.java @@ -17,22 +17,40 @@ */ package org.apache.avro; -import static java.util.Arrays.asList; -import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; -import static org.apache.avro.TestSchemas.*; - -import java.util.Arrays; -import java.util.Collections; -import java.util.List; - import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import org.junit.Test; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameter; import org.junit.runners.Parameterized.Parameters; -@RunWith(Parameterized.class) +import java.util.Collections; +import java.util.List; +import java.util.stream.Stream; + +import static java.util.Arrays.asList; +import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; +import static org.apache.avro.TestSchemas.A_DINT_B_DINT_STRING_UNION_RECORD1; +import static org.apache.avro.TestSchemas.A_DINT_B_DINT_UNION_RECORD1; +import static org.apache.avro.TestSchemas.BOOLEAN_SCHEMA; +import static org.apache.avro.TestSchemas.BYTES_UNION_SCHEMA; +import static org.apache.avro.TestSchemas.DOUBLE_UNION_SCHEMA; +import static org.apache.avro.TestSchemas.ENUM1_AB_SCHEMA; +import static org.apache.avro.TestSchemas.FIXED_4_BYTES; +import static org.apache.avro.TestSchemas.FLOAT_UNION_SCHEMA; +import static org.apache.avro.TestSchemas.INT_ARRAY_SCHEMA; +import static org.apache.avro.TestSchemas.INT_LONG_FLOAT_DOUBLE_UNION_SCHEMA; +import static org.apache.avro.TestSchemas.INT_MAP_SCHEMA; +import static org.apache.avro.TestSchemas.INT_SCHEMA; +import static org.apache.avro.TestSchemas.INT_STRING_UNION_SCHEMA; +import static org.apache.avro.TestSchemas.INT_UNION_SCHEMA; +import static org.apache.avro.TestSchemas.LONG_UNION_SCHEMA; +import static org.apache.avro.TestSchemas.NULL_SCHEMA; +import static org.apache.avro.TestSchemas.STRING_UNION_SCHEMA; +import static org.apache.avro.TestSchemas.list; + public class TestSchemaCompatibilityMissingUnionBranch { private static final Schema RECORD1_WITH_INT = SchemaBuilder.record("Record1").fields() // @@ -50,61 +68,52 @@ public class TestSchemaCompatibilityMissingUnionBranch { private static final Schema UNION_INT_MAP_INT = Schema.createUnion(list(INT_SCHEMA, INT_MAP_SCHEMA)); private static final Schema UNION_INT_NULL = Schema.createUnion(list(INT_SCHEMA, NULL_SCHEMA)); - @Parameters(name = "r: {0} | w: {1}") - public static Iterable data() { - Object[][] fields = { // - { INT_UNION_SCHEMA, INT_STRING_UNION_SCHEMA, - Collections.singletonList("reader union lacking writer type: STRING"), Collections.singletonList("/1") }, - { STRING_UNION_SCHEMA, INT_STRING_UNION_SCHEMA, - Collections.singletonList("reader union lacking writer type: INT"), Collections.singletonList("/0") }, - { INT_UNION_SCHEMA, UNION_INT_RECORD1, Collections.singletonList("reader union lacking writer type: RECORD"), - Collections.singletonList("/1") }, - { INT_UNION_SCHEMA, UNION_INT_RECORD2, Collections.singletonList("reader union lacking writer type: RECORD"), - Collections.singletonList("/1") }, + public static Stream data() { + return Stream.of( // + Arguments.of(INT_UNION_SCHEMA, INT_STRING_UNION_SCHEMA, + Collections.singletonList("reader union lacking writer type: STRING"), Collections.singletonList("/1")), + Arguments.of(STRING_UNION_SCHEMA, INT_STRING_UNION_SCHEMA, + Collections.singletonList("reader union lacking writer type: INT"), Collections.singletonList("/0")), + Arguments.of(INT_UNION_SCHEMA, UNION_INT_RECORD1, + Collections.singletonList("reader union lacking writer type: RECORD"), Collections.singletonList("/1")), + Arguments.of(INT_UNION_SCHEMA, UNION_INT_RECORD2, + Collections.singletonList("reader union lacking writer type: RECORD"), Collections.singletonList("/1")), // more info in the subset schemas - { UNION_INT_RECORD1, UNION_INT_RECORD2, Collections.singletonList("reader union lacking writer type: RECORD"), - Collections.singletonList("/1") }, - { INT_UNION_SCHEMA, UNION_INT_ENUM1_AB, Collections.singletonList("reader union lacking writer type: ENUM"), - Collections.singletonList("/1") }, - { INT_UNION_SCHEMA, UNION_INT_FIXED_4_BYTES, - Collections.singletonList("reader union lacking writer type: FIXED"), Collections.singletonList("/1") }, - { INT_UNION_SCHEMA, UNION_INT_BOOLEAN, Collections.singletonList("reader union lacking writer type: BOOLEAN"), - Collections.singletonList("/1") }, - { INT_UNION_SCHEMA, LONG_UNION_SCHEMA, Collections.singletonList("reader union lacking writer type: LONG"), - Collections.singletonList("/0") }, - { INT_UNION_SCHEMA, FLOAT_UNION_SCHEMA, Collections.singletonList("reader union lacking writer type: FLOAT"), - Collections.singletonList("/0") }, - { INT_UNION_SCHEMA, DOUBLE_UNION_SCHEMA, Collections.singletonList("reader union lacking writer type: DOUBLE"), - Collections.singletonList("/0") }, - { INT_UNION_SCHEMA, BYTES_UNION_SCHEMA, Collections.singletonList("reader union lacking writer type: BYTES"), - Collections.singletonList("/0") }, - { INT_UNION_SCHEMA, UNION_INT_ARRAY_INT, Collections.singletonList("reader union lacking writer type: ARRAY"), - Collections.singletonList("/1") }, - { INT_UNION_SCHEMA, UNION_INT_MAP_INT, Collections.singletonList("reader union lacking writer type: MAP"), - Collections.singletonList("/1") }, - { INT_UNION_SCHEMA, UNION_INT_NULL, Collections.singletonList("reader union lacking writer type: NULL"), - Collections.singletonList("/1") }, - { INT_UNION_SCHEMA, INT_LONG_FLOAT_DOUBLE_UNION_SCHEMA, + Arguments.of(UNION_INT_RECORD1, UNION_INT_RECORD2, + Collections.singletonList("reader union lacking writer type: RECORD"), Collections.singletonList("/1")), + Arguments.of(INT_UNION_SCHEMA, UNION_INT_ENUM1_AB, + Collections.singletonList("reader union lacking writer type: ENUM"), Collections.singletonList("/1")), + Arguments.of(INT_UNION_SCHEMA, UNION_INT_FIXED_4_BYTES, + Collections.singletonList("reader union lacking writer type: FIXED"), Collections.singletonList("/1")), + Arguments.of(INT_UNION_SCHEMA, UNION_INT_BOOLEAN, + Collections.singletonList("reader union lacking writer type: BOOLEAN"), Collections.singletonList("/1")), + Arguments.of(INT_UNION_SCHEMA, LONG_UNION_SCHEMA, + Collections.singletonList("reader union lacking writer type: LONG"), Collections.singletonList("/0")), + Arguments.of(INT_UNION_SCHEMA, FLOAT_UNION_SCHEMA, + Collections.singletonList("reader union lacking writer type: FLOAT"), Collections.singletonList("/0")), + Arguments.of(INT_UNION_SCHEMA, DOUBLE_UNION_SCHEMA, + Collections.singletonList("reader union lacking writer type: DOUBLE"), Collections.singletonList("/0")), + Arguments.of(INT_UNION_SCHEMA, BYTES_UNION_SCHEMA, + Collections.singletonList("reader union lacking writer type: BYTES"), Collections.singletonList("/0")), + Arguments.of(INT_UNION_SCHEMA, UNION_INT_ARRAY_INT, + Collections.singletonList("reader union lacking writer type: ARRAY"), Collections.singletonList("/1")), + Arguments.of(INT_UNION_SCHEMA, UNION_INT_MAP_INT, + Collections.singletonList("reader union lacking writer type: MAP"), Collections.singletonList("/1")), + Arguments.of(INT_UNION_SCHEMA, UNION_INT_NULL, + Collections.singletonList("reader union lacking writer type: NULL"), Collections.singletonList("/1")), + Arguments.of(INT_UNION_SCHEMA, INT_LONG_FLOAT_DOUBLE_UNION_SCHEMA, asList("reader union lacking writer type: LONG", "reader union lacking writer type: FLOAT", "reader union lacking writer type: DOUBLE"), - asList("/1", "/2", "/3") }, - { A_DINT_B_DINT_UNION_RECORD1, A_DINT_B_DINT_STRING_UNION_RECORD1, + asList("/1", "/2", "/3")), + Arguments.of(A_DINT_B_DINT_UNION_RECORD1, A_DINT_B_DINT_STRING_UNION_RECORD1, Collections.singletonList("reader union lacking writer type: STRING"), - Collections.singletonList("/fields/1/type/1") } }; - return Arrays.asList(fields); + Collections.singletonList("/fields/1/type/1"))); } - @Parameter(0) - public Schema reader; - @Parameter(1) - public Schema writer; - @Parameter(2) - public List details; - @Parameter(3) - public List location; - - @Test - public void testMissingUnionBranch() throws Exception { + @ParameterizedTest + @MethodSource("data") + public void testMissingUnionBranch(Schema reader, Schema writer, List details, List location) + throws Exception { List types = Collections.nCopies(details.size(), SchemaIncompatibilityType.MISSING_UNION_BRANCH); validateIncompatibleSchemas(reader, writer, types, details, location); diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityMultiple.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityMultiple.java index 23946755b85..456e4b9d178 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityMultiple.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityMultiple.java @@ -23,12 +23,12 @@ import java.util.List; import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestSchemaCompatibilityMultiple { @Test - public void testMultipleIncompatibilities() throws Exception { + void multipleIncompatibilities() throws Exception { Schema reader = SchemaBuilder.record("base").fields() // 0 .name("check_enum_symbols_field").type().enumeration("check_enum_symbols_type").symbols("A", "C").noDefault() diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityNameMismatch.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityNameMismatch.java index 83c89ab7b76..d20561faae8 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityNameMismatch.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityNameMismatch.java @@ -17,44 +17,37 @@ */ package org.apache.avro; -import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; -import static org.apache.avro.TestSchemas.*; +import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import java.util.Arrays; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; -import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameter; -import org.junit.runners.Parameterized.Parameters; +import java.util.stream.Stream; + +import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; +import static org.apache.avro.TestSchemas.A_DINT_B_DENUM_1_RECORD1; +import static org.apache.avro.TestSchemas.A_DINT_B_DENUM_2_RECORD1; +import static org.apache.avro.TestSchemas.EMPTY_RECORD1; +import static org.apache.avro.TestSchemas.EMPTY_RECORD2; +import static org.apache.avro.TestSchemas.ENUM1_AB_SCHEMA; +import static org.apache.avro.TestSchemas.ENUM2_AB_SCHEMA; +import static org.apache.avro.TestSchemas.FIXED_4_BYTES; -@RunWith(Parameterized.class) public class TestSchemaCompatibilityNameMismatch { private static final Schema FIXED_4_ANOTHER_NAME = Schema.createFixed("AnotherName", null, null, 4); - @Parameters(name = "r: {0} | w: {1}") - public static Iterable data() { - Object[][] fields = { // - { ENUM1_AB_SCHEMA, ENUM2_AB_SCHEMA, "expected: Enum2", "/name" }, - { EMPTY_RECORD2, EMPTY_RECORD1, "expected: Record1", "/name" }, - { FIXED_4_BYTES, FIXED_4_ANOTHER_NAME, "expected: AnotherName", "/name" }, - { A_DINT_B_DENUM_1_RECORD1, A_DINT_B_DENUM_2_RECORD1, "expected: Enum2", "/fields/1/type/name" } }; - return Arrays.asList(fields); + public static Stream data() { + return Stream.of(Arguments.of(ENUM1_AB_SCHEMA, ENUM2_AB_SCHEMA, "expected: Enum2", "/name"), + Arguments.of(EMPTY_RECORD2, EMPTY_RECORD1, "expected: Record1", "/name"), + Arguments.of(FIXED_4_BYTES, FIXED_4_ANOTHER_NAME, "expected: AnotherName", "/name"), + Arguments.of(A_DINT_B_DENUM_1_RECORD1, A_DINT_B_DENUM_2_RECORD1, "expected: Enum2", "/fields/1/type/name")); } - @Parameter(0) - public Schema reader; - @Parameter(1) - public Schema writer; - @Parameter(2) - public String details; - @Parameter(3) - public String location; - - @Test - public void testNameMismatchSchemas() throws Exception { + @ParameterizedTest + @MethodSource("data") + public void testNameMismatchSchemas(Schema reader, Schema writer, String details, String location) throws Exception { validateIncompatibleSchemas(reader, writer, SchemaIncompatibilityType.NAME_MISMATCH, details, location); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityReaderFieldMissingDefaultValue.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityReaderFieldMissingDefaultValue.java index d367caed941..7a21c1a5fcd 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityReaderFieldMissingDefaultValue.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityReaderFieldMissingDefaultValue.java @@ -17,38 +17,29 @@ */ package org.apache.avro; -import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; -import static org.apache.avro.TestSchemas.*; +import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import java.util.Arrays; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; -import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameter; -import org.junit.runners.Parameterized.Parameters; +import java.util.stream.Stream; + +import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; +import static org.apache.avro.TestSchemas.A_INT_B_DINT_RECORD1; +import static org.apache.avro.TestSchemas.A_INT_RECORD1; +import static org.apache.avro.TestSchemas.EMPTY_RECORD1; -@RunWith(Parameterized.class) public class TestSchemaCompatibilityReaderFieldMissingDefaultValue { - @Parameters(name = "r: {0} | w: {1}") - public static Iterable data() { - Object[][] fields = { // - { A_INT_RECORD1, EMPTY_RECORD1, "a", "/fields/0" }, { A_INT_B_DINT_RECORD1, EMPTY_RECORD1, "a", "/fields/0" } }; - return Arrays.asList(fields); - } - @Parameter(0) - public Schema reader; - @Parameter(1) - public Schema writer; - @Parameter(2) - public String details; - @Parameter(3) - public String location; + public static Stream data() { + return Stream.of(Arguments.of(A_INT_RECORD1, EMPTY_RECORD1, "a", "/fields/0"), + Arguments.of(A_INT_B_DINT_RECORD1, EMPTY_RECORD1, "a", "/fields/0")); + } - @Test - public void testReaderFieldMissingDefaultValueSchemas() throws Exception { + @ParameterizedTest + @MethodSource("data") + public void testReaderFieldMissingDefaultValueSchemas(Schema reader, Schema writer, String details, String location) { validateIncompatibleSchemas(reader, writer, SchemaIncompatibilityType.READER_FIELD_MISSING_DEFAULT_VALUE, details, location); } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityTypeMismatch.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityTypeMismatch.java index 63dd3ac11a7..247e40404ba 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityTypeMismatch.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityTypeMismatch.java @@ -17,82 +17,94 @@ */ package org.apache.avro; -import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; -import static org.apache.avro.TestSchemas.*; +import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import java.util.Arrays; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; -import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameter; -import org.junit.runners.Parameterized.Parameters; +import java.util.stream.Stream; + +import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; +import static org.apache.avro.TestSchemas.A_INT_RECORD1; +import static org.apache.avro.TestSchemas.BOOLEAN_SCHEMA; +import static org.apache.avro.TestSchemas.BYTES_SCHEMA; +import static org.apache.avro.TestSchemas.DOUBLE_SCHEMA; +import static org.apache.avro.TestSchemas.ENUM2_AB_SCHEMA; +import static org.apache.avro.TestSchemas.FIXED_4_BYTES; +import static org.apache.avro.TestSchemas.FLOAT_SCHEMA; +import static org.apache.avro.TestSchemas.INT_ARRAY_SCHEMA; +import static org.apache.avro.TestSchemas.INT_FLOAT_UNION_SCHEMA; +import static org.apache.avro.TestSchemas.INT_LIST_RECORD; +import static org.apache.avro.TestSchemas.INT_LONG_FLOAT_DOUBLE_UNION_SCHEMA; +import static org.apache.avro.TestSchemas.INT_MAP_SCHEMA; +import static org.apache.avro.TestSchemas.INT_SCHEMA; +import static org.apache.avro.TestSchemas.LONG_ARRAY_SCHEMA; +import static org.apache.avro.TestSchemas.LONG_LIST_RECORD; +import static org.apache.avro.TestSchemas.LONG_MAP_SCHEMA; +import static org.apache.avro.TestSchemas.LONG_SCHEMA; +import static org.apache.avro.TestSchemas.NULL_SCHEMA; +import static org.apache.avro.TestSchemas.STRING_SCHEMA; -@RunWith(Parameterized.class) public class TestSchemaCompatibilityTypeMismatch { - @Parameters(name = "r: {0} | w: {1}") - public static Iterable data() { - Object[][] fields = { // - { NULL_SCHEMA, INT_SCHEMA, "reader type: NULL not compatible with writer type: INT", "/" }, - { NULL_SCHEMA, LONG_SCHEMA, "reader type: NULL not compatible with writer type: LONG", "/" }, - { BOOLEAN_SCHEMA, INT_SCHEMA, "reader type: BOOLEAN not compatible with writer type: INT", "/" }, + public static Stream data() { + return Stream.of( + Arguments.of(NULL_SCHEMA, INT_SCHEMA, "reader type: NULL not compatible with writer type: INT", "/"), + Arguments.of(NULL_SCHEMA, LONG_SCHEMA, "reader type: NULL not compatible with writer type: LONG", "/"), + + Arguments.of(BOOLEAN_SCHEMA, INT_SCHEMA, "reader type: BOOLEAN not compatible with writer type: INT", "/"), - { INT_SCHEMA, NULL_SCHEMA, "reader type: INT not compatible with writer type: NULL", "/" }, - { INT_SCHEMA, BOOLEAN_SCHEMA, "reader type: INT not compatible with writer type: BOOLEAN", "/" }, - { INT_SCHEMA, LONG_SCHEMA, "reader type: INT not compatible with writer type: LONG", "/" }, - { INT_SCHEMA, FLOAT_SCHEMA, "reader type: INT not compatible with writer type: FLOAT", "/" }, - { INT_SCHEMA, DOUBLE_SCHEMA, "reader type: INT not compatible with writer type: DOUBLE", "/" }, + Arguments.of(INT_SCHEMA, NULL_SCHEMA, "reader type: INT not compatible with writer type: NULL", "/"), + Arguments.of(INT_SCHEMA, BOOLEAN_SCHEMA, "reader type: INT not compatible with writer type: BOOLEAN", "/"), + Arguments.of(INT_SCHEMA, LONG_SCHEMA, "reader type: INT not compatible with writer type: LONG", "/"), + Arguments.of(INT_SCHEMA, FLOAT_SCHEMA, "reader type: INT not compatible with writer type: FLOAT", "/"), + Arguments.of(INT_SCHEMA, DOUBLE_SCHEMA, "reader type: INT not compatible with writer type: DOUBLE", "/"), - { LONG_SCHEMA, FLOAT_SCHEMA, "reader type: LONG not compatible with writer type: FLOAT", "/" }, - { LONG_SCHEMA, DOUBLE_SCHEMA, "reader type: LONG not compatible with writer type: DOUBLE", "/" }, + Arguments.of(LONG_SCHEMA, FLOAT_SCHEMA, "reader type: LONG not compatible with writer type: FLOAT", "/"), + Arguments.of(LONG_SCHEMA, DOUBLE_SCHEMA, "reader type: LONG not compatible with writer type: DOUBLE", "/"), - { FLOAT_SCHEMA, DOUBLE_SCHEMA, "reader type: FLOAT not compatible with writer type: DOUBLE", "/" }, + Arguments.of(FLOAT_SCHEMA, DOUBLE_SCHEMA, "reader type: FLOAT not compatible with writer type: DOUBLE", "/"), - { DOUBLE_SCHEMA, STRING_SCHEMA, "reader type: DOUBLE not compatible with writer type: STRING", "/" }, + Arguments.of(DOUBLE_SCHEMA, STRING_SCHEMA, "reader type: DOUBLE not compatible with writer type: STRING", "/"), - { FIXED_4_BYTES, STRING_SCHEMA, "reader type: FIXED not compatible with writer type: STRING", "/" }, + Arguments.of(FIXED_4_BYTES, STRING_SCHEMA, "reader type: FIXED not compatible with writer type: STRING", "/"), - { STRING_SCHEMA, BOOLEAN_SCHEMA, "reader type: STRING not compatible with writer type: BOOLEAN", "/" }, - { STRING_SCHEMA, INT_SCHEMA, "reader type: STRING not compatible with writer type: INT", "/" }, + Arguments.of(STRING_SCHEMA, BOOLEAN_SCHEMA, "reader type: STRING not compatible with writer type: BOOLEAN", + "/"), + Arguments.of(STRING_SCHEMA, INT_SCHEMA, "reader type: STRING not compatible with writer type: INT", "/"), - { BYTES_SCHEMA, NULL_SCHEMA, "reader type: BYTES not compatible with writer type: NULL", "/" }, - { BYTES_SCHEMA, INT_SCHEMA, "reader type: BYTES not compatible with writer type: INT", "/" }, + Arguments.of(BYTES_SCHEMA, NULL_SCHEMA, "reader type: BYTES not compatible with writer type: NULL", "/"), + Arguments.of(BYTES_SCHEMA, INT_SCHEMA, "reader type: BYTES not compatible with writer type: INT", "/"), - { A_INT_RECORD1, INT_SCHEMA, "reader type: RECORD not compatible with writer type: INT", "/" }, + Arguments.of(A_INT_RECORD1, INT_SCHEMA, "reader type: RECORD not compatible with writer type: INT", "/"), - { INT_ARRAY_SCHEMA, LONG_ARRAY_SCHEMA, "reader type: INT not compatible with writer type: LONG", "/items" }, - { INT_MAP_SCHEMA, INT_ARRAY_SCHEMA, "reader type: MAP not compatible with writer type: ARRAY", "/" }, - { INT_ARRAY_SCHEMA, INT_MAP_SCHEMA, "reader type: ARRAY not compatible with writer type: MAP", "/" }, - { INT_MAP_SCHEMA, LONG_MAP_SCHEMA, "reader type: INT not compatible with writer type: LONG", "/values" }, + Arguments.of(INT_ARRAY_SCHEMA, LONG_ARRAY_SCHEMA, "reader type: INT not compatible with writer type: LONG", + "/items"), + Arguments.of(INT_MAP_SCHEMA, INT_ARRAY_SCHEMA, "reader type: MAP not compatible with writer type: ARRAY", "/"), + Arguments.of(INT_ARRAY_SCHEMA, INT_MAP_SCHEMA, "reader type: ARRAY not compatible with writer type: MAP", "/"), + Arguments.of(INT_MAP_SCHEMA, LONG_MAP_SCHEMA, "reader type: INT not compatible with writer type: LONG", + "/values"), - { INT_SCHEMA, ENUM2_AB_SCHEMA, "reader type: INT not compatible with writer type: ENUM", "/" }, - { ENUM2_AB_SCHEMA, INT_SCHEMA, "reader type: ENUM not compatible with writer type: INT", "/" }, + Arguments.of(INT_SCHEMA, ENUM2_AB_SCHEMA, "reader type: INT not compatible with writer type: ENUM", "/"), + Arguments.of(ENUM2_AB_SCHEMA, INT_SCHEMA, "reader type: ENUM not compatible with writer type: INT", "/"), - { FLOAT_SCHEMA, INT_LONG_FLOAT_DOUBLE_UNION_SCHEMA, - "reader type: FLOAT not compatible with writer type: DOUBLE", "/" }, - { LONG_SCHEMA, INT_FLOAT_UNION_SCHEMA, "reader type: LONG not compatible with writer type: FLOAT", "/" }, - { INT_SCHEMA, INT_FLOAT_UNION_SCHEMA, "reader type: INT not compatible with writer type: FLOAT", "/" }, + Arguments.of(FLOAT_SCHEMA, INT_LONG_FLOAT_DOUBLE_UNION_SCHEMA, + "reader type: FLOAT not compatible with writer type: DOUBLE", "/3"), + Arguments.of(LONG_SCHEMA, INT_FLOAT_UNION_SCHEMA, "reader type: LONG not compatible with writer type: FLOAT", + "/1"), + Arguments.of(INT_SCHEMA, INT_FLOAT_UNION_SCHEMA, "reader type: INT not compatible with writer type: FLOAT", + "/1"), - { INT_LIST_RECORD, LONG_LIST_RECORD, "reader type: INT not compatible with writer type: LONG", - "/fields/0/type" }, + Arguments.of(INT_LIST_RECORD, LONG_LIST_RECORD, "reader type: INT not compatible with writer type: LONG", + "/fields/0/type"), - { NULL_SCHEMA, INT_SCHEMA, "reader type: NULL not compatible with writer type: INT", "/" } }; - return Arrays.asList(fields); + Arguments.of(NULL_SCHEMA, INT_SCHEMA, "reader type: NULL not compatible with writer type: INT", "/")); } - @Parameter(0) - public Schema reader; - @Parameter(1) - public Schema writer; - @Parameter(2) - public String details; - @Parameter(3) - public String location; - - @Test - public void testTypeMismatchSchemas() throws Exception { + @ParameterizedTest + @MethodSource("data") + public void testTypeMismatchSchemas(Schema reader, Schema writer, String details, String location) throws Exception { validateIncompatibleSchemas(reader, writer, SchemaIncompatibilityType.TYPE_MISMATCH, details, location); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaNormalization.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaNormalization.java index 97b7a7803ce..1351d900abe 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaNormalization.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaNormalization.java @@ -18,9 +18,7 @@ package org.apache.avro; import static java.nio.charset.StandardCharsets.UTF_8; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - +import static org.junit.jupiter.api.Assertions.assertEquals; import java.io.BufferedReader; import java.io.IOException; import java.nio.file.Files; @@ -31,52 +29,37 @@ import java.util.Locale; import org.apache.avro.util.CaseFinder; -import org.junit.Test; -import org.junit.experimental.runners.Enclosed; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; -@RunWith(Enclosed.class) public class TestSchemaNormalization { - @RunWith(Parameterized.class) + @Nested public static class TestCanonical { - String input, expectedOutput; - - public TestCanonical(String i, String o) { - input = i; - expectedOutput = o; - } - @Parameters public static List cases() throws IOException { return CaseFinder.find(data(), "canonical", new ArrayList<>()); } - @Test - public void testCanonicalization() throws Exception { - assertEquals(SchemaNormalization.toParsingForm(new Schema.Parser().parse(input)), expectedOutput); + @ParameterizedTest + @MethodSource("cases") + void canonicalization(String input, String expectedOutput) { + assertEquals(SchemaNormalization.toParsingForm(SchemaParser.parseSingle(input)), expectedOutput); } } - @RunWith(Parameterized.class) + @Nested public static class TestFingerprint { - String input, expectedOutput; - public TestFingerprint(String i, String o) { - input = i; - expectedOutput = o; - } - - @Parameters public static List cases() throws IOException { return CaseFinder.find(data(), "fingerprint", new ArrayList<>()); } - @Test - public void testCanonicalization() throws Exception { - Schema s = new Schema.Parser().parse(input); + @ParameterizedTest + @MethodSource("cases") + void canonicalization(String input, String expectedOutput) { + Schema s = SchemaParser.parseSingle(input); long carefulFP = altFingerprint(SchemaNormalization.toParsingForm(s)); assertEquals(carefulFP, Long.parseLong(expectedOutput)); assertEqHex(carefulFP, SchemaNormalization.parsingFingerprint64(s)); @@ -84,25 +67,19 @@ public void testCanonicalization() throws Exception { } // see AVRO-1493 - @RunWith(Parameterized.class) + @Nested public static class TestFingerprintInternationalization { - String input, expectedOutput; - - public TestFingerprintInternationalization(String i, String o) { - input = i; - expectedOutput = o; - } - @Parameters public static List cases() throws IOException { return CaseFinder.find(data(), "fingerprint", new ArrayList<>()); } - @Test - public void testCanonicalization() throws Exception { + @ParameterizedTest + @MethodSource("cases") + void canonicalization(String input, String expectedOutput) { Locale originalDefaultLocale = Locale.getDefault(); Locale.setDefault(Locale.forLanguageTag("tr")); - Schema s = new Schema.Parser().parse(input); + Schema s = SchemaParser.parseSingle(input); long carefulFP = altFingerprint(SchemaNormalization.toParsingForm(s)); assertEquals(carefulFP, Long.parseLong(expectedOutput)); assertEqHex(carefulFP, SchemaNormalization.parsingFingerprint64(s)); @@ -152,8 +129,7 @@ private static long altExtend(long poly, int degree, long fp, byte[] b) { private static final byte[] POSTFIX = { 0, 0, 0, 0, 0, 0, 0, 0 }; private static void assertEqHex(long expected, long actual) { - String m = format("0x%016x != 0x%016x", expected, actual); - assertTrue(m, expected == actual); + assertEquals(expected, actual, () -> format("0x%016x != 0x%016x", expected, actual)); } private static String format(String f, Object... args) { diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaParser.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaParser.java new file mode 100644 index 00000000000..88a4a040962 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaParser.java @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import com.fasterxml.jackson.core.JsonParseException; +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.StringReader; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; + +import static java.util.Collections.singletonList; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class TestSchemaParser { + private static final Schema SCHEMA_REAL = Schema.createFixed("Real", null, "tests", 42); + private static final String SCHEMA_JSON = SchemaFormatter.getInstance("json").format(SCHEMA_REAL); + private static final Charset[] UTF_CHARSETS = { StandardCharsets.UTF_8, StandardCharsets.UTF_16LE, + StandardCharsets.UTF_16BE }; + + @Test + void testStaticParseText() { + Schema schema = SchemaParser.parseSingle(SCHEMA_JSON); + assertEquals(SCHEMA_REAL, schema); + } + + @Test + void testStaticParsePath() throws IOException { + Path tempFile = Files.createTempFile("TestSchemaParser", null); + Files.write(tempFile, singletonList(SCHEMA_JSON)); + + Schema schema = SchemaParser.parseSingle(tempFile); + assertEquals(SCHEMA_REAL, schema); + } + + @Test + void testParseFile() throws IOException { + Path tempFile = Files.createTempFile("TestSchemaParser", null); + Files.write(tempFile, singletonList(SCHEMA_JSON)); + + Schema schema = new SchemaParser().parse(tempFile.toFile()).mainSchema(); + assertEquals(SCHEMA_REAL, schema); + } + + @Test + void testParsePath() throws IOException { + Path tempFile = Files.createTempFile("TestSchemaParser", null); + Files.write(tempFile, singletonList(SCHEMA_JSON)); + + Schema schema = new SchemaParser().parse(tempFile).mainSchema(); + assertEquals(SCHEMA_REAL, schema); + } + + @Test + void testParseURI() throws IOException { + Path tempFile = Files.createTempFile("TestSchemaParser", null); + Charset charset = UTF_CHARSETS[(int) Math.floor(UTF_CHARSETS.length * Math.random())]; + Files.write(tempFile, singletonList(SCHEMA_JSON), charset); + + Schema schema = new SchemaParser().parse(tempFile.toUri(), null).mainSchema(); + assertEquals(SCHEMA_REAL, schema); + } + + @Test + void testParseReader() throws IOException { + Schema schema = new SchemaParser().parse(new StringReader(SCHEMA_JSON)).mainSchema(); + assertEquals(SCHEMA_REAL, schema); + } + + @Test + void testParseStream() throws IOException { + Schema schema = new SchemaParser().parse(new ByteArrayInputStream(SCHEMA_JSON.getBytes(StandardCharsets.UTF_16))) + .mainSchema(); + assertEquals(SCHEMA_REAL, schema); + } + + @Test + void testParseTextWithFallbackJsonParser() { + Schema schema = new SchemaParser().parse(SCHEMA_JSON).mainSchema(); + assertEquals(SCHEMA_REAL, schema); + } + + @Test + void testParseByCustomParser() { + SchemaParser.ParseResult parseResult = new SchemaParser().parse(DummySchemaParser.SCHEMA_TEXT_ONE); + List namedSchemas = parseResult.parsedNamedSchemas(); + assertEquals(1, namedSchemas.size()); + assertEquals(DummySchemaParser.FIXED_SCHEMA, namedSchemas.get(0)); + Schema schema = parseResult.mainSchema(); + assertEquals(DummySchemaParser.FIXED_SCHEMA, schema); + } + + @Test + void testSingleParseError() { + SchemaParseException parseException = assertThrows(SchemaParseException.class, + () -> new SchemaParser().parse("foo").mainSchema()); + assertEquals(JsonParseException.class, parseException.getCause().getClass()); + assertEquals(0, parseException.getSuppressed().length); + } + + @Test + void testMultipleParseErrors() { + SchemaParseException parseException = assertThrows(SchemaParseException.class, + () -> new SchemaParser().parse(DummySchemaParser.SCHEMA_TEXT_ERROR).mainSchema()); + assertTrue(parseException.getMessage().startsWith("Could not parse the schema")); + Throwable[] suppressed = parseException.getSuppressed(); + assertEquals(2, suppressed.length); + assertEquals(DummySchemaParser.ERROR_MESSAGE, suppressed[0].getMessage()); + assertEquals(JsonParseException.class, suppressed[1].getCause().getClass()); + } + + @Test + void testIOFailureWhileParsingText() { + AvroRuntimeException exception = assertThrows(AvroRuntimeException.class, + () -> new SchemaParser().parse(DummySchemaParser.SCHEMA_TEXT_IO_ERROR).mainSchema()); + assertEquals(IOException.class, exception.getCause().getClass()); + assertEquals(DummySchemaParser.IO_ERROR_MESSAGE, exception.getCause().getMessage()); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaValidateDefault.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaValidateDefault.java new file mode 100644 index 00000000000..a86519c7560 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaValidateDefault.java @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import org.apache.avro.generic.GenericData; +import org.apache.avro.io.Decoder; +import org.apache.avro.io.DecoderFactory; +import org.apache.avro.io.Encoder; +import org.apache.avro.io.EncoderFactory; +import org.apache.avro.reflect.ReflectData; +import org.apache.avro.reflect.ReflectDatumReader; +import org.apache.avro.reflect.ReflectDatumWriter; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.Objects; +import java.util.function.Function; + +public class TestSchemaValidateDefault { + + @Test + public void valueReadWithCorrectDefaultValue() throws IOException { + + ExampleRecord writtenValue = new ExampleRecord(new ComplexValue(42L), new ComplexValue(666L)); + byte[] bytes = getSerializer(ExampleRecord.SCHEMA_WITH_ONE_FIELD).apply(writtenValue); + + ReflectDatumReader reader = new ReflectDatumReader<>(ExampleRecord.SCHEMA_WITH_ONE_FIELD, + ExampleRecord.SCHEMA_WITH_TWO_FIELDS, ReflectData.get()); + Decoder decoder = DecoderFactory.get().jsonDecoder(ExampleRecord.SCHEMA_WITH_ONE_FIELD, + new ByteArrayInputStream(bytes)); + ExampleRecord deserializedValue = reader.read(null, decoder); + + Assertions.assertNotNull(deserializedValue.getValue2(), "Null get value2"); + Assertions.assertEquals(15L, deserializedValue.getValue2().getValue()); + } + + public static Function getSerializer(Schema writerSchema) { + Objects.requireNonNull(writerSchema, "writerSchema must not be null"); + + ReflectDatumWriter writer = new ReflectDatumWriter<>(writerSchema, new ReflectData()); + return object -> { + try { + ByteArrayOutputStream stream = new ByteArrayOutputStream(); + Encoder encoder = EncoderFactory.get().jsonEncoder(writerSchema, stream); + writer.write(object, encoder); + encoder.flush(); + return stream.toByteArray(); + } catch (IOException e) { + throw new IllegalStateException(String.format("Avro failed to encode %s to schema %s", object, writerSchema), + e); + } + }; + } + + public static Function getDeserializer(Class readClass, Schema readerSchema, Schema writerSchema) { + Objects.requireNonNull(readClass, "readClass must not be null"); + Objects.requireNonNull(readerSchema, "readerSchema must not be null"); + Objects.requireNonNull(writerSchema, "writerSchema must not be null"); + + ReflectDatumReader reader = new ReflectDatumReader<>(writerSchema, readerSchema, new ReflectData()); + return (byte[] bytes) -> { + try { + Decoder decoder = DecoderFactory.get().jsonDecoder(writerSchema, new ByteArrayInputStream(bytes)); + T readValue = reader.read(null, decoder); + return readValue; + } catch (IOException e) { + throw new IllegalStateException(String.format("Avro failed to decode %s to %s", new String(bytes), readClass), + e); + } + }; + } + + static final Schema SCHEMA = SchemaBuilder.record("org.apache.avro.TestSchemaValidateDefault.ComplexValue").fields() + .optionalLong("value").endRecord(); + + public static class ComplexValue { + + private Long value; + + public ComplexValue() { + } + + public ComplexValue(Long value) { + this.value = value; + } + + public Long getValue() { + return this.value; + } + + @Override + public String toString() { + return "{" + "\"value\": { \"long\": " + this.value + "}}"; + } + } + + public static class ExampleRecord { + public static final Schema SCHEMA_WITH_ONE_FIELD; + public static final Schema SCHEMA_WITH_TWO_FIELDS; + + static { + SCHEMA_WITH_ONE_FIELD = SchemaBuilder.record("org.apache.avro.TestSchemaValidateDefault.ExampleRecord").fields() + .name("value1").type(TestSchemaValidateDefault.SCHEMA).noDefault().endRecord(); + + GenericData.Record record = new GenericData.Record(TestSchemaValidateDefault.SCHEMA); + record.put("value", 15L); + + SCHEMA_WITH_TWO_FIELDS = SchemaBuilder.record("org.apache.avro.TestSchemaValidateDefault.ExampleRecord").fields() + .name("value1").type(TestSchemaValidateDefault.SCHEMA).noDefault().name("value2") + .type(TestSchemaValidateDefault.SCHEMA).withDefault(record).endRecord(); + } + + private ComplexValue value1; + private ComplexValue value2; + + public ExampleRecord() { + } + + public ExampleRecord(ComplexValue value1, ComplexValue value2) { + this.value1 = value1; + this.value2 = value2; + } + + public ComplexValue getValue1() { + return this.value1; + } + + public ComplexValue getValue2() { + return this.value2; + } + } + +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaValidation.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaValidation.java index 61c354ef143..84f3b9a5fe2 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaValidation.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaValidation.java @@ -18,22 +18,18 @@ package org.apache.avro; import static org.apache.avro.TestSchemas.*; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; import java.util.Collections; import java.util.List; import org.apache.avro.reflect.ReflectData; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; +import org.junit.jupiter.api.Test; public class TestSchemaValidation { - @Rule - public ExpectedException expectedException = ExpectedException.none(); - /** Collection of reader/writer schema pair that are compatible. */ public static final List COMPATIBLE_READER_WRITER_TEST_CASES = list( new ReaderWriter(BOOLEAN_SCHEMA, BOOLEAN_SCHEMA), @@ -136,7 +132,8 @@ public class TestSchemaValidation { new ReaderWriter(INT_ARRAY_SCHEMA, LONG_ARRAY_SCHEMA), new ReaderWriter(INT_MAP_SCHEMA, INT_ARRAY_SCHEMA), new ReaderWriter(INT_ARRAY_SCHEMA, INT_MAP_SCHEMA), new ReaderWriter(INT_MAP_SCHEMA, LONG_MAP_SCHEMA), - new ReaderWriter(ENUM1_AB_SCHEMA, ENUM1_ABC_SCHEMA), new ReaderWriter(ENUM1_BC_SCHEMA, ENUM1_ABC_SCHEMA), + // new ReaderWriter(ENUM1_AB_SCHEMA, ENUM1_ABC_SCHEMA), + // new ReaderWriter(ENUM1_BC_SCHEMA, ENUM1_ABC_SCHEMA), new ReaderWriter(ENUM1_AB_SCHEMA, ENUM2_AB_SCHEMA), new ReaderWriter(INT_SCHEMA, ENUM2_AB_SCHEMA), new ReaderWriter(ENUM2_AB_SCHEMA, INT_SCHEMA), @@ -147,10 +144,10 @@ public class TestSchemaValidation { new ReaderWriter(FLOAT_SCHEMA, INT_LONG_FLOAT_DOUBLE_UNION_SCHEMA), new ReaderWriter(LONG_SCHEMA, INT_FLOAT_UNION_SCHEMA), new ReaderWriter(INT_SCHEMA, INT_FLOAT_UNION_SCHEMA), - new ReaderWriter(EMPTY_RECORD2, EMPTY_RECORD1), new ReaderWriter(A_INT_RECORD1, EMPTY_RECORD1), - new ReaderWriter(A_INT_B_DINT_RECORD1, EMPTY_RECORD1), + // new ReaderWriter(EMPTY_RECORD2, EMPTY_RECORD1), + new ReaderWriter(A_INT_RECORD1, EMPTY_RECORD1), new ReaderWriter(A_INT_B_DINT_RECORD1, EMPTY_RECORD1), - new ReaderWriter(INT_LIST_RECORD, LONG_LIST_RECORD), + // new ReaderWriter(INT_LIST_RECORD, LONG_LIST_RECORD), new ReaderWriter(NULL_SCHEMA, INT_SCHEMA)); @@ -169,7 +166,7 @@ public class TestSchemaValidation { .name("b").type().longType().noDefault().name("c").type().intType().intDefault(0).endRecord(); @Test - public void testAllTypes() throws SchemaValidationException { + void allTypes() throws SchemaValidationException { Schema s = SchemaBuilder.record("r").fields().requiredBoolean("boolF").requiredInt("intF").requiredLong("longF") .requiredFloat("floatF").requiredDouble("doubleF").requiredString("stringF").requiredBytes("bytesF") .name("fixedF1").type().fixed("F1").size(1).noDefault().name("enumF").type().enumeration("E1").symbols("S") @@ -180,46 +177,48 @@ public void testAllTypes() throws SchemaValidationException { } @Test - public void testReadOnePrior() throws SchemaValidationException { + void readOnePrior() throws SchemaValidationException { testValidatorPasses(builder.canReadStrategy().validateLatest(), rec3, rec); testValidatorPasses(builder.canReadStrategy().validateLatest(), rec5, rec3); testValidatorFails(builder.canReadStrategy().validateLatest(), rec4, rec); } @Test - public void testReadAllPrior() throws SchemaValidationException { + void readAllPrior() throws SchemaValidationException { testValidatorPasses(builder.canReadStrategy().validateAll(), rec3, rec, rec2); testValidatorFails(builder.canReadStrategy().validateAll(), rec4, rec, rec2, rec3); testValidatorFails(builder.canReadStrategy().validateAll(), rec5, rec, rec2, rec3); } @Test - public void testOnePriorCanRead() throws SchemaValidationException { + void onePriorCanRead() throws SchemaValidationException { testValidatorPasses(builder.canBeReadStrategy().validateLatest(), rec, rec3); testValidatorFails(builder.canBeReadStrategy().validateLatest(), rec, rec4); } @Test - public void testAllPriorCanRead() throws SchemaValidationException { + void allPriorCanRead() throws SchemaValidationException { testValidatorPasses(builder.canBeReadStrategy().validateAll(), rec, rec3, rec2); testValidatorFails(builder.canBeReadStrategy().validateAll(), rec, rec4, rec3, rec2); } @Test - public void testOnePriorCompatible() throws SchemaValidationException { + void onePriorCompatible() throws SchemaValidationException { testValidatorPasses(builder.mutualReadStrategy().validateLatest(), rec, rec3); testValidatorFails(builder.mutualReadStrategy().validateLatest(), rec, rec4); } @Test - public void testAllPriorCompatible() throws SchemaValidationException { + void allPriorCompatible() throws SchemaValidationException { testValidatorPasses(builder.mutualReadStrategy().validateAll(), rec, rec3, rec2); testValidatorFails(builder.mutualReadStrategy().validateAll(), rec, rec4, rec3, rec2); } - @Test(expected = AvroRuntimeException.class) - public void testInvalidBuild() { - builder.strategy(null).validateAll(); + @Test + void invalidBuild() { + assertThrows(AvroRuntimeException.class, () -> { + builder.strategy(null).validateAll(); + }); } public static class Point { @@ -241,33 +240,33 @@ public static class Circle { .endRecord(); @Test - public void testReflectMatchStructure() throws SchemaValidationException { + void reflectMatchStructure() throws SchemaValidationException { testValidatorPasses(builder.canBeReadStrategy().validateAll(), circleSchemaDifferentNames, ReflectData.get().getSchema(Circle.class)); } @Test - public void testReflectWithAllowNullMatchStructure() throws SchemaValidationException { + void reflectWithAllowNullMatchStructure() throws SchemaValidationException { testValidatorPasses(builder.canBeReadStrategy().validateAll(), circleSchemaDifferentNames, ReflectData.AllowNull.get().getSchema(Circle.class)); } @Test - public void testUnionWithIncompatibleElements() throws SchemaValidationException { + void unionWithIncompatibleElements() throws SchemaValidationException { Schema union1 = Schema.createUnion(Collections.singletonList(rec)); Schema union2 = Schema.createUnion(Collections.singletonList(rec4)); testValidatorFails(builder.canReadStrategy().validateAll(), union2, union1); } @Test - public void testUnionWithCompatibleElements() throws SchemaValidationException { + void unionWithCompatibleElements() throws SchemaValidationException { Schema union1 = Schema.createUnion(Collections.singletonList(rec)); Schema union2 = Schema.createUnion(Collections.singletonList(rec3)); testValidatorPasses(builder.canReadStrategy().validateAll(), union2, union1); } @Test - public void testSchemaCompatibilitySuccesses() throws SchemaValidationException { + void schemaCompatibilitySuccesses() throws SchemaValidationException { // float-union-to-int/long-union does not work... // and neither does recursive types for (ReaderWriter tc : COMPATIBLE_READER_WRITER_TEST_CASES) { @@ -276,14 +275,19 @@ public void testSchemaCompatibilitySuccesses() throws SchemaValidationException } @Test - public void testSchemaCompatibilityFailures() throws SchemaValidationException { + void schemaCompatibilityFailures() { for (ReaderWriter tc : INCOMPATIBLE_READER_WRITER_TEST_CASES) { Schema reader = tc.getReader(); Schema writer = tc.getWriter(); - expectedException.expect(SchemaValidationException.class); - expectedException.expectMessage("Unable to read schema: \n" + writer.toString()); + + String expectedMsg = "Unable to read schema: \n" + writer.toString(false); SchemaValidator validator = builder.canReadStrategy().validateAll(); - validator.validate(reader, Collections.singleton(writer)); + SchemaValidationException exception = assertThrows(SchemaValidationException.class, + () -> validator.validate(reader, Collections.singleton(writer)), + "No or wrong exception for (" + reader.toString(false) + "; " + writer.toString(false) + ")"); + assertTrue(exception.getMessage().contains("Unable to read schema:"), + "'" + expectedMsg + "' != '" + exception.getMessage() + "'"); + } } @@ -309,17 +313,17 @@ private void testValidatorFails(SchemaValidator validator, Schema schemaFails, S } catch (SchemaValidationException sve) { threw = true; } - Assert.assertTrue(threw); + assertTrue(threw); } - public static final org.apache.avro.Schema recursiveSchema = new org.apache.avro.Schema.Parser().parse( + public static final org.apache.avro.Schema recursiveSchema = SchemaParser.parseSingle( "{\"type\":\"record\",\"name\":\"Node\",\"namespace\":\"avro\",\"fields\":[{\"name\":\"value\",\"type\":[\"null\",\"Node\"],\"default\":null}]}"); /** * Unit test to verify that recursive schemas can be validated. See AVRO-2122. */ @Test - public void testRecursiveSchemaValidation() throws SchemaValidationException { + void recursiveSchemaValidation() throws SchemaValidationException { // before AVRO-2122, this would cause a StackOverflowError final SchemaValidator backwardValidator = builder.canReadStrategy().validateLatest(); backwardValidator.validate(recursiveSchema, Collections.singletonList(recursiveSchema)); diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaWarnings.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaWarnings.java index e14ec626b73..3f8165d0474 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaWarnings.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaWarnings.java @@ -17,9 +17,10 @@ */ package org.apache.avro; -import org.junit.AfterClass; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.DisabledIfEnvironmentVariable; import java.io.ByteArrayOutputStream; import java.io.PrintStream; @@ -40,13 +41,13 @@ public class TestSchemaWarnings { */ private final ByteArrayOutputStream capturedErr = new ByteArrayOutputStream(); - @Before + @BeforeEach public void setupStdErr() { capturedErr.reset(); System.setErr(new PrintStream(capturedErr)); } - @AfterClass + @AfterAll public static void restoreStdErr() { System.setErr(originalErr); } @@ -59,12 +60,14 @@ public String getCapturedStdErr() { } @Test - public void testWarnWhenTheLogicalTypeIsOnTheField() { + // FIXME: Find a different way of capturing the output + @DisabledIfEnvironmentVariable(named = "WithinInvokerPlugin", matches = "true", disabledReason = "Redirecting stderr does not work within the invoker plugin") + void warnWhenTheLogicalTypeIsOnTheField() { // A record with a single int field. Schema s = SchemaBuilder.record("A").fields().requiredInt("a1").endRecord(); // Force reparsing the schema, and no warning should be logged. - s = new Schema.Parser().parse(s.toString()); + s = SchemaParser.parseSingle(s.toString()); assertThat(s.getField("a1").schema().getLogicalType(), nullValue()); assertThat(getCapturedStdErr(), is("")); @@ -74,7 +77,7 @@ public void testWarnWhenTheLogicalTypeIsOnTheField() { assertThat(s.getField("a1").schema().getLogicalType(), nullValue()); // Force reparsing the schema, and a warning should be logged. - s = new Schema.Parser().parse(s.toString()); + s = SchemaParser.parseSingle(s.toString()); assertThat(getCapturedStdErr(), containsString("Ignored the A.a1.logicalType property (\"date\"). It should" + " probably be nested inside the \"type\" for the field.")); assertThat(s.getField("a1").schema().getLogicalType(), nullValue()); @@ -86,14 +89,16 @@ public void testWarnWhenTheLogicalTypeIsOnTheField() { // Force reparsing the schema. No warning should be logged, and the logical type // should be applied. - s = new Schema.Parser().parse(s.toString()); + s = SchemaParser.parseSingle(s.toString()); assertThat(getCapturedStdErr(), is("")); assertThat(s.getField("a1").schema().getLogicalType(), is(LogicalTypes.date())); } @Test - public void testWarnWhenTheLogicalTypeIsIgnored() { + // FIXME: Find a different way of capturing the output + @DisabledIfEnvironmentVariable(named = "WithinInvokerPlugin", matches = "true", disabledReason = "Redirecting stderr does not work within the invoker plugin") + void warnWhenTheLogicalTypeIsIgnored() { // A record with a single int field. Schema s = SchemaBuilder.record("A").fields().requiredLong("a1").endRecord(); @@ -102,7 +107,7 @@ public void testWarnWhenTheLogicalTypeIsIgnored() { s.getField("a1").schema().addProp(LOGICAL_TYPE_PROP, LogicalTypes.date().getName()); // Force reparsing the schema. No warning should be logged, and the logical type // should be applied. - s = new Schema.Parser().parse(s.toString()); + s = SchemaParser.parseSingle(s.toString()); assertThat(s.getField("a1").schema().getLogicalType(), nullValue()); assertThat(getCapturedStdErr(), containsString("Ignoring invalid logical type for name: date")); } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemas.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemas.java index 30cabadb563..a37de9bfb0d 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemas.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemas.java @@ -17,14 +17,17 @@ */ package org.apache.avro; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; import java.util.Collections; import org.apache.avro.Schema.Field; -/** Schemas used by other tests in this package. Therefore package protected. */ +/** + * Schemas used by other tests in this package. Therefore mostly package + * protected. + */ public class TestSchemas { static final Schema NULL_SCHEMA = Schema.create(Schema.Type.NULL); @@ -45,6 +48,11 @@ public class TestSchemas { static final Schema STRING_MAP_SCHEMA = Schema.createMap(STRING_SCHEMA); static final Schema ENUM1_AB_SCHEMA = Schema.createEnum("Enum1", null, null, list("A", "B")); + static final Schema ENUM1_AB_SCHEMA_DEFAULT = Schema.createEnum("Enum1", null, null, list("A", "B"), "A"); + public static final Schema ENUM1_AB_SCHEMA_NAMESPACE_1 = Schema.createEnum("Enum1", null, "namespace1", + list("A", "B")); + public static final Schema ENUM1_AB_SCHEMA_NAMESPACE_2 = Schema.createEnum("Enum1", null, "namespace2", + list("A", "B")); static final Schema ENUM1_ABC_SCHEMA = Schema.createEnum("Enum1", null, null, list("A", "B", "C")); static final Schema ENUM1_BC_SCHEMA = Schema.createEnum("Enum1", null, null, list("B", "C")); static final Schema ENUM2_AB_SCHEMA = Schema.createEnum("Enum2", null, null, list("A", "B")); @@ -108,6 +116,9 @@ public class TestSchemas { static final Schema NS_INNER_RECORD1 = Schema.createRecord("InnerRecord1", null, "ns1", false); static final Schema NS_INNER_RECORD2 = Schema.createRecord("InnerRecord1", null, "ns2", false); + static final Schema WITHOUT_NS = Schema.createRecord("Record", null, null, false); + static final Schema WITH_NS = Schema.createRecord("ns.Record", null, null, false); + static { EMPTY_RECORD1.setFields(Collections.emptyList()); EMPTY_RECORD2.setFields(Collections.emptyList()); @@ -137,6 +148,9 @@ public class TestSchemas { .setFields(list(new Schema.Field("f1", Schema.createUnion(NULL_SCHEMA, Schema.createArray(NS_INNER_RECORD1))))); NS_RECORD2 .setFields(list(new Schema.Field("f1", Schema.createUnion(NULL_SCHEMA, Schema.createArray(NS_INNER_RECORD2))))); + + WITH_NS.setFields(list(new Field("f1", INT_SCHEMA, null, null))); + WITHOUT_NS.setFields(list(new Field("f1", INT_SCHEMA, null, null))); } // Recursive records @@ -180,7 +194,7 @@ static ArrayList list(E... elements) { static void assertSchemaContains(Schema schemaSubset, Schema original) { String subset = schemaSubset.toString(false); String whole = original.toString(false); - assertTrue(String.format("Subset '%s' not found in '%s'", subset, whole), whole.contains(subset)); + assertTrue(whole.contains(subset), String.format("Subset '%s' not found in '%s'", subset, whole)); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSystemLimitException.java b/lang/java/avro/src/test/java/org/apache/avro/TestSystemLimitException.java new file mode 100644 index 00000000000..0da39179506 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSystemLimitException.java @@ -0,0 +1,164 @@ +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro; + +import static org.apache.avro.SystemLimitException.*; +import static org.junit.jupiter.api.Assertions.*; + +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Test; + +import java.util.function.Function; + +public class TestSystemLimitException { + + /** Delegated here for package visibility. */ + public static final int MAX_ARRAY_VM_LIMIT = SystemLimitException.MAX_ARRAY_VM_LIMIT; + + public static final String ERROR_NEGATIVE = "Malformed data. Length is negative: -1"; + public static final String ERROR_VM_LIMIT_BYTES = "Cannot read arrays longer than " + MAX_ARRAY_VM_LIMIT + + " bytes in Java library"; + public static final String ERROR_VM_LIMIT_COLLECTION = "Cannot read collections larger than " + MAX_ARRAY_VM_LIMIT + + " items in Java library"; + public static final String ERROR_VM_LIMIT_STRING = "Cannot read strings longer than " + MAX_ARRAY_VM_LIMIT + " bytes"; + + /** Delegated here for package visibility. */ + public static void resetLimits() { + SystemLimitException.resetLimits(); + } + + @AfterEach + void reset() { + System.clearProperty(MAX_BYTES_LENGTH_PROPERTY); + System.clearProperty(MAX_COLLECTION_LENGTH_PROPERTY); + System.clearProperty(MAX_STRING_LENGTH_PROPERTY); + resetLimits(); + } + + /** + * A helper method that tests the consistent limit handling from system + * properties. + * + * @param f The function to be tested. + * @param sysProperty The system property used to control the custom limit. + * @param errorVmLimit The error message used when the property would be + * over the VM limit. + * @param errorCustomLimit The error message used when the property would be + * over the custom limit of 1000. + */ + void helpCheckSystemLimits(Function f, String sysProperty, String errorVmLimit, + String errorCustomLimit) { + // Correct values pass through + assertEquals(0, f.apply(0L)); + assertEquals(1024, f.apply(1024L)); + assertEquals(MAX_ARRAY_VM_LIMIT, f.apply((long) MAX_ARRAY_VM_LIMIT)); + + // Values that exceed the default system limits throw exceptions + Exception ex = assertThrows(UnsupportedOperationException.class, () -> f.apply(Long.MAX_VALUE)); + assertEquals(errorVmLimit, ex.getMessage()); + ex = assertThrows(UnsupportedOperationException.class, () -> f.apply((long) MAX_ARRAY_VM_LIMIT + 1)); + assertEquals(errorVmLimit, ex.getMessage()); + ex = assertThrows(AvroRuntimeException.class, () -> f.apply(-1L)); + assertEquals(ERROR_NEGATIVE, ex.getMessage()); + + // Setting the system property to provide a custom limit. + System.setProperty(sysProperty, Long.toString(1000L)); + resetLimits(); + + // Correct values pass through + assertEquals(0, f.apply(0L)); + assertEquals(102, f.apply(102L)); + + // Values that exceed the custom system limits throw exceptions + ex = assertThrows(UnsupportedOperationException.class, () -> f.apply((long) MAX_ARRAY_VM_LIMIT + 1)); + assertEquals(errorVmLimit, ex.getMessage()); + ex = assertThrows(SystemLimitException.class, () -> f.apply(1024L)); + assertEquals(errorCustomLimit, ex.getMessage()); + ex = assertThrows(AvroRuntimeException.class, () -> f.apply(-1L)); + assertEquals(ERROR_NEGATIVE, ex.getMessage()); + } + + @Test + void testCheckMaxBytesLength() { + helpCheckSystemLimits(SystemLimitException::checkMaxBytesLength, MAX_BYTES_LENGTH_PROPERTY, ERROR_VM_LIMIT_BYTES, + "Bytes length 1024 exceeds maximum allowed"); + } + + @Test + void testCheckMaxCollectionLengthFromZero() { + helpCheckSystemLimits(l -> checkMaxCollectionLength(0L, l), MAX_COLLECTION_LENGTH_PROPERTY, + ERROR_VM_LIMIT_COLLECTION, "Collection length 1024 exceeds maximum allowed"); + } + + @Test + void testCheckMaxStringLength() { + helpCheckSystemLimits(SystemLimitException::checkMaxStringLength, MAX_STRING_LENGTH_PROPERTY, ERROR_VM_LIMIT_STRING, + "String length 1024 exceeds maximum allowed"); + } + + @Test + void testCheckMaxCollectionLengthFromNonZero() { + // Correct values pass through + assertEquals(10, checkMaxCollectionLength(10L, 0L)); + assertEquals(MAX_ARRAY_VM_LIMIT, checkMaxCollectionLength(10L, MAX_ARRAY_VM_LIMIT - 10L)); + assertEquals(MAX_ARRAY_VM_LIMIT, checkMaxCollectionLength(MAX_ARRAY_VM_LIMIT - 10L, 10L)); + + // Values that exceed the default system limits throw exceptions + Exception ex = assertThrows(UnsupportedOperationException.class, + () -> checkMaxCollectionLength(10L, MAX_ARRAY_VM_LIMIT - 9L)); + assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + ex = assertThrows(UnsupportedOperationException.class, + () -> checkMaxCollectionLength(SystemLimitException.MAX_ARRAY_VM_LIMIT - 9L, 10L)); + assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + ex = assertThrows(UnsupportedOperationException.class, () -> checkMaxCollectionLength(10L, Long.MAX_VALUE - 10L)); + assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + ex = assertThrows(UnsupportedOperationException.class, () -> checkMaxCollectionLength(Long.MAX_VALUE - 10L, 10L)); + assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Overflow that adds to negative + ex = assertThrows(UnsupportedOperationException.class, () -> checkMaxCollectionLength(10L, Long.MAX_VALUE)); + assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + ex = assertThrows(UnsupportedOperationException.class, () -> checkMaxCollectionLength(Long.MAX_VALUE, 10L)); + assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + ex = assertThrows(AvroRuntimeException.class, () -> checkMaxCollectionLength(10L, -1L)); + assertEquals(ERROR_NEGATIVE, ex.getMessage()); + ex = assertThrows(AvroRuntimeException.class, () -> checkMaxCollectionLength(-1L, 10L)); + assertEquals(ERROR_NEGATIVE, ex.getMessage()); + + // Setting the system property to provide a custom limit. + System.setProperty(MAX_COLLECTION_LENGTH_PROPERTY, Long.toString(1000L)); + resetLimits(); + + // Correct values pass through + assertEquals(10, checkMaxCollectionLength(10L, 0L)); + assertEquals(102, checkMaxCollectionLength(10L, 92L)); + assertEquals(102, checkMaxCollectionLength(92L, 10L)); + + // Values that exceed the custom system limits throw exceptions + ex = assertThrows(UnsupportedOperationException.class, () -> checkMaxCollectionLength(MAX_ARRAY_VM_LIMIT, 1)); + assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + ex = assertThrows(UnsupportedOperationException.class, () -> checkMaxCollectionLength(1, MAX_ARRAY_VM_LIMIT)); + assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + ex = assertThrows(SystemLimitException.class, () -> checkMaxCollectionLength(999, 25)); + assertEquals("Collection length 1024 exceeds maximum allowed", ex.getMessage()); + ex = assertThrows(SystemLimitException.class, () -> checkMaxCollectionLength(25, 999)); + assertEquals("Collection length 1024 exceeds maximum allowed", ex.getMessage()); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestUnionError.java b/lang/java/avro/src/test/java/org/apache/avro/TestUnionError.java new file mode 100644 index 00000000000..d5847ba36a9 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/TestUnionError.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.io.BinaryDecoder; +import org.apache.avro.io.BinaryEncoder; +import org.apache.avro.io.DatumReader; +import org.apache.avro.io.DecoderFactory; +import org.apache.avro.io.EncoderFactory; + +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +public class TestUnionError { + + @Test + void unionErrorMessage() throws IOException { + String writerSchemaJson = " {\n" + " \"type\" : \"record\",\n" + + " \"name\" : \"C\",\n" + " \"fields\" : [ {\n" + + " \"name\" : \"c\",\n" + " \"type\" : [ {\n" + + " \"type\" : \"record\",\n" + " \"name\" : \"A\",\n" + + " \"fields\" : [ {\n" + " \"name\" : \"amount\",\n" + + " \"type\" : \"int\"\n" + " } ]\n" + " }, {\n" + + " \"type\" : \"record\",\n" + " \"name\" : \"B\",\n" + + " \"fields\" : [ {\n" + " \"name\" : \"amount1\",\n" + + " \"type\" : \"int\"\n" + " } ]\n" + " } ]\n" + + " } ]\n" + " }"; + Schema writerSchema = SchemaParser.parseSingle(writerSchemaJson); + + String readerSchemaJson = " {\n" + " \"type\" : \"record\",\n" + " \"name\" : \"C1\",\n" + + " \"fields\" : [ {\n" + " \"name\" : \"c\",\n" + + " \"type\" : [ {\n" + " \"type\" : \"record\",\n" + + " \"name\" : \"A\",\n" + " \"fields\" : [ {\n" + + " \"name\" : \"amount\",\n" + " \"type\" : \"int\"\n" + + " } ]\n" + " }, \"float\" ]\n" + " } ]\n" + " }"; + Schema readerSchema = SchemaParser.parseSingle(readerSchemaJson); + + List unionSchemas = writerSchema.getField("c").schema().getTypes(); + + GenericRecord r = new GenericData.Record(writerSchema); + GenericRecord b = new GenericData.Record(unionSchemas.get(1)); + b.put("amount1", 12); + r.put("c", b); + + ByteArrayOutputStream outs = new ByteArrayOutputStream(); + GenericDatumWriter datumWriter = new GenericDatumWriter<>(writerSchema); + BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(outs, null); + datumWriter.write(r, encoder); + encoder.flush(); + + InputStream ins = new ByteArrayInputStream(outs.toByteArray()); + BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(ins, null); + + GenericData data = new GenericData(); + data.setFastReaderEnabled(false); + GenericDatumReader datumReader = new GenericDatumReader<>(writerSchema, readerSchema, data); + AvroTypeException avroException = assertThrows(AvroTypeException.class, () -> datumReader.read(null, decoder)); + assertEquals("Field \"c\" content mismatch: Found B, expecting union[A, float]", avroException.getMessage()); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestUnionSelfReference.java b/lang/java/avro/src/test/java/org/apache/avro/TestUnionSelfReference.java index b9bb4dd6c65..1efa853dd86 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestUnionSelfReference.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestUnionSelfReference.java @@ -19,11 +19,11 @@ import org.slf4j.LoggerFactory; -import static org.junit.Assert.*; - import org.apache.avro.Schema.Field; + +import static org.junit.jupiter.api.Assertions.assertEquals; import org.apache.avro.Schema.Type; -import org.junit.Test; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; public class TestUnionSelfReference { @@ -31,24 +31,19 @@ public class TestUnionSelfReference { @SuppressWarnings("unused") private static final Logger LOG = LoggerFactory.getLogger(TestUnionSelfReference.class); - private static final String SIMPLE_BINARY_TREE = "{" + " \"namespace\": \"tree\"," + " \"type\": \"record\"," - + " \"name\": \"Node\"," + " \"fields\": [" + " {" + " \"name\": \"left\"," - + " \"type\": [" + " \"null\"," + " {" + " \"type\": \"Node\"" + " }" - + " ]," + " \"default\": null" + " }," + " {" + " \"name\": \"right\"," - + " \"type\": [" + " \"null\"," + " {" + " \"type\": \"Node\"" + " }" - + " ]," + " \"default\": null" + " }" + " ]" + " }"; + private static final String SIMPLE_BINARY_TREE = "{" + + "\"namespace\":\"tree\",\"type\":\"record\",\"name\":\"Node\",\"fields\":[" + + "{\"name\":\"left\",\"type\":[\"null\",\"Node\"],\"default\":null}," + + "{\"name\":\"right\",\"type\":[\"null\",\"Node\"],\"default\":null}]}"; - private static final String THREE_TYPE_UNION = "{" + " \"namespace\": \"tree\"," + " \"type\": \"record\"," - + " \"name\": \"Node\"," + " \"fields\": [" + " {" + " \"name\": \"left\"," - + " \"type\": [" + " \"null\"," + " \"string\"," + " {" - + " \"type\": \"Node\"" + " }" + " ]," + " \"default\": null" + " }," - + " {" + " \"name\": \"right\"," + " \"type\": [" + " \"null\"," - + " \"string\"," + " {" + " \"type\": \"Node\"" + " }" + " ]," - + " \"default\": null" + " }" + " ]" + " }"; + private static final String THREE_TYPE_UNION = "{" + + "\"namespace\":\"tree\",\"type\":\"record\",\"name\":\"Node\",\"fields\":[" + + "{\"name\":\"left\",\"type\":[\"null\",\"string\",\"Node\"],\"default\":null}," + + "{\"name\":\"right\",\"type\":[\"null\",\"string\",\"Node\"],\"default\":null}]}"; @Test - public void testSelfReferenceInUnion() { - Schema schema = new Schema.Parser().parse(SIMPLE_BINARY_TREE); + void selfReferenceInUnion() { + Schema schema = SchemaParser.parseSingle(SIMPLE_BINARY_TREE); Field leftField = schema.getField("left"); assertEquals(JsonProperties.NULL_VALUE, leftField.defaultVal()); final Schema leftFieldSchema = leftField.schema(); @@ -65,8 +60,8 @@ public void testSelfReferenceInUnion() { } @Test - public void testSelfReferenceInThreeUnion() { - Schema schema = new Schema.Parser().parse(THREE_TYPE_UNION); + void selfReferenceInThreeUnion() { + Schema schema = SchemaParser.parseSingle(THREE_TYPE_UNION); Field leftField = schema.getField("left"); assertEquals(JsonProperties.NULL_VALUE, leftField.defaultVal()); final Schema leftFieldSchema = leftField.schema(); diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestUuidConversions.java b/lang/java/avro/src/test/java/org/apache/avro/TestUuidConversions.java new file mode 100644 index 00000000000..640bf1a2bb5 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/TestUuidConversions.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import org.apache.avro.generic.GenericFixed; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.math.BigInteger; +import java.util.UUID; +import java.util.stream.Stream; + +public class TestUuidConversions { + + private Conversions.UUIDConversion uuidConversion = new Conversions.UUIDConversion(); + + private Schema fixed = Schema.createFixed("fixed", "doc", "", Long.BYTES * 2); + private Schema fixedUuid = LogicalTypes.uuid().addToSchema(fixed); + + private Schema string = Schema.createFixed("fixed", "doc", "", Long.BYTES * 2); + private Schema stringUuid = LogicalTypes.uuid().addToSchema(string); + + @ParameterizedTest + @MethodSource("uuidData") + void uuidFixed(UUID uuid) { + GenericFixed value = uuidConversion.toFixed(uuid, fixedUuid, LogicalTypes.uuid()); + + byte[] b = new byte[Long.BYTES]; + System.arraycopy(value.bytes(), 0, b, 0, b.length); + Assertions.assertEquals(uuid.getMostSignificantBits(), new BigInteger(b).longValue()); + System.arraycopy(value.bytes(), Long.BYTES, b, 0, b.length); + Assertions.assertEquals(uuid.getLeastSignificantBits(), new BigInteger(b).longValue()); + + UUID uuid1 = uuidConversion.fromFixed(value, fixedUuid, LogicalTypes.uuid()); + Assertions.assertEquals(uuid, uuid1); + } + + @ParameterizedTest + @MethodSource("uuidData") + void uuidCharSequence(UUID uuid) { + CharSequence value = uuidConversion.toCharSequence(uuid, stringUuid, LogicalTypes.uuid()); + + Assertions.assertEquals(uuid.toString(), value.toString()); + + UUID uuid1 = uuidConversion.fromCharSequence(value, stringUuid, LogicalTypes.uuid()); + Assertions.assertEquals(uuid, uuid1); + } + + public static Stream uuidData() { + return Stream.of(Arguments.of(new UUID(Long.MIN_VALUE, Long.MAX_VALUE)), Arguments.of(new UUID(-1, 0)), + Arguments.of(UUID.randomUUID()), Arguments.of(UUID.randomUUID())); + } + +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/TypeEnum.java b/lang/java/avro/src/test/java/org/apache/avro/TypeEnum.java index 0d9617bfc00..15a7ac924d5 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TypeEnum.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TypeEnum.java @@ -26,7 +26,7 @@ public enum TypeEnum implements org.apache.avro.generic.GenericEnumSymbol { a, b, c; - public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse( + public static final org.apache.avro.Schema SCHEMA$ = org.apache.avro.JsonSchemaParser.parseInternal( "{\"type\":\"enum\",\"name\":\"TypeEnum\",\"namespace\":\"org.apache.avro\",\"symbols\":[\"a\",\"b\",\"c\"]}"); public static org.apache.avro.Schema getClassSchema() { diff --git a/lang/java/avro/src/test/java/org/apache/avro/data/RecordBuilderBaseTest.java b/lang/java/avro/src/test/java/org/apache/avro/data/RecordBuilderBaseTest.java index 5f3012f869c..c2d161de50c 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/data/RecordBuilderBaseTest.java +++ b/lang/java/avro/src/test/java/org/apache/avro/data/RecordBuilderBaseTest.java @@ -17,6 +17,9 @@ */ package org.apache.avro.data; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + import java.util.Arrays; import java.util.HashSet; import java.util.Set; @@ -24,9 +27,8 @@ import org.apache.avro.Schema; import org.apache.avro.Schema.Field; import org.apache.avro.Schema.Type; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Test; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; /** * Unit test for RecordBuilderBase. @@ -35,7 +37,7 @@ public class RecordBuilderBaseTest { private static Set primitives; private static Set nonNullPrimitives; - @BeforeClass() + @BeforeAll() public static void setUpBeforeClass() { primitives = new HashSet<>(Arrays.asList(Type.values())); primitives.removeAll(Arrays.asList(Type.RECORD, Type.ENUM, Type.ARRAY, Type.MAP, Type.UNION, Type.FIXED)); @@ -45,39 +47,39 @@ public static void setUpBeforeClass() { } @Test - public void testIsValidValueWithPrimitives() { + void isValidValueWithPrimitives() { // Verify that a non-null value is valid for all primitives: for (Type type : primitives) { Field f = new Field("f", Schema.create(type), null, null); - Assert.assertTrue(RecordBuilderBase.isValidValue(f, new Object())); + assertTrue(RecordBuilderBase.isValidValue(f, new Object())); } // Verify that null is not valid for all non-null primitives: for (Type type : nonNullPrimitives) { Field f = new Field("f", Schema.create(type), null, null); - Assert.assertFalse(RecordBuilderBase.isValidValue(f, null)); + assertFalse(RecordBuilderBase.isValidValue(f, null)); } } @Test - public void testIsValidValueWithNullField() { + void isValidValueWithNullField() { // Verify that null is a valid value for null fields: - Assert.assertTrue(RecordBuilderBase.isValidValue(new Field("f", Schema.create(Type.NULL), null, null), null)); + assertTrue(RecordBuilderBase.isValidValue(new Field("f", Schema.create(Type.NULL), null, null), null)); } @Test - public void testIsValidValueWithUnion() { + void isValidValueWithUnion() { // Verify that null values are not valid for a union with no null type: Schema unionWithoutNull = Schema .createUnion(Arrays.asList(Schema.create(Type.STRING), Schema.create(Type.BOOLEAN))); - Assert.assertTrue(RecordBuilderBase.isValidValue(new Field("f", unionWithoutNull, null, null), new Object())); - Assert.assertFalse(RecordBuilderBase.isValidValue(new Field("f", unionWithoutNull, null, null), null)); + assertTrue(RecordBuilderBase.isValidValue(new Field("f", unionWithoutNull, null, null), new Object())); + assertFalse(RecordBuilderBase.isValidValue(new Field("f", unionWithoutNull, null, null), null)); // Verify that null values are valid for a union with a null type: Schema unionWithNull = Schema.createUnion(Arrays.asList(Schema.create(Type.STRING), Schema.create(Type.NULL))); - Assert.assertTrue(RecordBuilderBase.isValidValue(new Field("f", unionWithNull, null, null), new Object())); - Assert.assertTrue(RecordBuilderBase.isValidValue(new Field("f", unionWithNull, null, null), null)); + assertTrue(RecordBuilderBase.isValidValue(new Field("f", unionWithNull, null, null), new Object())); + assertTrue(RecordBuilderBase.isValidValue(new Field("f", unionWithNull, null, null), null)); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/data/TestTimeConversions.java b/lang/java/avro/src/test/java/org/apache/avro/data/TestTimeConversions.java index 0cf4454d580..089915803a0 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/data/TestTimeConversions.java +++ b/lang/java/avro/src/test/java/org/apache/avro/data/TestTimeConversions.java @@ -18,6 +18,8 @@ package org.apache.avro.data; +import static org.junit.jupiter.api.Assertions.assertEquals; + import java.time.Instant; import java.time.LocalDate; import java.time.LocalTime; @@ -32,9 +34,8 @@ import org.apache.avro.data.TimeConversions.TimestampMicrosConversion; import org.apache.avro.data.TimeConversions.TimestampMillisConversion; import org.apache.avro.reflect.ReflectData; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Test; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; public class TestTimeConversions { @@ -44,7 +45,7 @@ public class TestTimeConversions { public static Schema TIMESTAMP_MILLIS_SCHEMA; public static Schema TIMESTAMP_MICROS_SCHEMA; - @BeforeClass + @BeforeAll public static void createSchemas() { TestTimeConversions.DATE_SCHEMA = LogicalTypes.date().addToSchema(Schema.create(Schema.Type.INT)); TestTimeConversions.TIME_MILLIS_SCHEMA = LogicalTypes.timeMillis().addToSchema(Schema.create(Schema.Type.INT)); @@ -56,78 +57,74 @@ public static void createSchemas() { } @Test - public void testDateConversion() throws Exception { + void dateConversion() throws Exception { DateConversion conversion = new DateConversion(); LocalDate Jan_6_1970 = LocalDate.of(1970, 1, 6); // 5 LocalDate Jan_1_1970 = LocalDate.of(1970, 1, 1); // 0 LocalDate Dec_27_1969 = LocalDate.of(1969, 12, 27); // -5 - Assert.assertEquals("6 Jan 1970 should be 5", 5, - (int) conversion.toInt(Jan_6_1970, DATE_SCHEMA, LogicalTypes.date())); - Assert.assertEquals("1 Jan 1970 should be 0", 0, - (int) conversion.toInt(Jan_1_1970, DATE_SCHEMA, LogicalTypes.date())); - Assert.assertEquals("27 Dec 1969 should be -5", -5, - (int) conversion.toInt(Dec_27_1969, DATE_SCHEMA, LogicalTypes.date())); - - Assert.assertEquals("6 Jan 1970 should be 5", conversion.fromInt(5, DATE_SCHEMA, LogicalTypes.date()), Jan_6_1970); - Assert.assertEquals("1 Jan 1970 should be 0", conversion.fromInt(0, DATE_SCHEMA, LogicalTypes.date()), Jan_1_1970); - Assert.assertEquals("27 Dec 1969 should be -5", conversion.fromInt(-5, DATE_SCHEMA, LogicalTypes.date()), - Dec_27_1969); + assertEquals(5, (int) conversion.toInt(Jan_6_1970, DATE_SCHEMA, LogicalTypes.date()), "6 Jan 1970 should be 5"); + assertEquals(0, (int) conversion.toInt(Jan_1_1970, DATE_SCHEMA, LogicalTypes.date()), "1 Jan 1970 should be 0"); + assertEquals(-5, (int) conversion.toInt(Dec_27_1969, DATE_SCHEMA, LogicalTypes.date()), "27 Dec 1969 should be -5"); + + assertEquals(conversion.fromInt(5, DATE_SCHEMA, LogicalTypes.date()), Jan_6_1970, "6 Jan 1970 should be 5"); + assertEquals(conversion.fromInt(0, DATE_SCHEMA, LogicalTypes.date()), Jan_1_1970, "1 Jan 1970 should be 0"); + assertEquals(conversion.fromInt(-5, DATE_SCHEMA, LogicalTypes.date()), Dec_27_1969, "27 Dec 1969 should be -5"); } @Test - public void testTimeMillisConversion() { + void timeMillisConversion() { TimeMillisConversion conversion = new TimeMillisConversion(); LocalTime oneAM = LocalTime.of(1, 0); LocalTime afternoon = LocalTime.of(15, 14, 15, 926_000_000); int afternoonMillis = ((15 * 60 + 14) * 60 + 15) * 1000 + 926; - Assert.assertEquals("Midnight should be 0", 0, - (int) conversion.toInt(LocalTime.MIDNIGHT, TIME_MILLIS_SCHEMA, LogicalTypes.timeMillis())); - Assert.assertEquals("01:00 should be 3,600,000", 3_600_000, - (int) conversion.toInt(oneAM, TIME_MILLIS_SCHEMA, LogicalTypes.timeMillis())); - Assert.assertEquals("15:14:15.926 should be " + afternoonMillis, afternoonMillis, - (int) conversion.toInt(afternoon, TIME_MILLIS_SCHEMA, LogicalTypes.timeMillis())); - - Assert.assertEquals("Midnight should be 0", LocalTime.MIDNIGHT, - conversion.fromInt(0, TIME_MILLIS_SCHEMA, LogicalTypes.timeMillis())); - Assert.assertEquals("01:00 should be 3,600,000", oneAM, - conversion.fromInt(3600000, TIME_MILLIS_SCHEMA, LogicalTypes.timeMillis())); - Assert.assertEquals("15:14:15.926 should be " + afternoonMillis, afternoon, - conversion.fromInt(afternoonMillis, TIME_MILLIS_SCHEMA, LogicalTypes.timeMillis())); + assertEquals(0, (int) conversion.toInt(LocalTime.MIDNIGHT, TIME_MILLIS_SCHEMA, LogicalTypes.timeMillis()), + "Midnight should be 0"); + assertEquals(3_600_000, (int) conversion.toInt(oneAM, TIME_MILLIS_SCHEMA, LogicalTypes.timeMillis()), + "01:00 should be 3,600,000"); + assertEquals(afternoonMillis, (int) conversion.toInt(afternoon, TIME_MILLIS_SCHEMA, LogicalTypes.timeMillis()), + "15:14:15.926 should be " + afternoonMillis); + + assertEquals(LocalTime.MIDNIGHT, conversion.fromInt(0, TIME_MILLIS_SCHEMA, LogicalTypes.timeMillis()), + "Midnight should be 0"); + assertEquals(oneAM, conversion.fromInt(3600000, TIME_MILLIS_SCHEMA, LogicalTypes.timeMillis()), + "01:00 should be 3,600,000"); + assertEquals(afternoon, conversion.fromInt(afternoonMillis, TIME_MILLIS_SCHEMA, LogicalTypes.timeMillis()), + "15:14:15.926 should be " + afternoonMillis); } @Test - public void testTimeMicrosConversion() throws Exception { + void timeMicrosConversion() throws Exception { TimeMicrosConversion conversion = new TimeMicrosConversion(); LocalTime oneAM = LocalTime.of(1, 0); LocalTime afternoon = LocalTime.of(15, 14, 15, 926_551_000); long afternoonMicros = ((long) (15 * 60 + 14) * 60 + 15) * 1_000_000 + 926_551; - Assert.assertEquals("Midnight should be 0", LocalTime.MIDNIGHT, - conversion.fromLong(0L, TIME_MICROS_SCHEMA, LogicalTypes.timeMicros())); - Assert.assertEquals("01:00 should be 3,600,000,000", oneAM, - conversion.fromLong(3_600_000_000L, TIME_MICROS_SCHEMA, LogicalTypes.timeMicros())); - Assert.assertEquals("15:14:15.926551 should be " + afternoonMicros, afternoon, - conversion.fromLong(afternoonMicros, TIME_MICROS_SCHEMA, LogicalTypes.timeMicros())); - - Assert.assertEquals("Midnight should be 0", 0, - (long) conversion.toLong(LocalTime.MIDNIGHT, TIME_MICROS_SCHEMA, LogicalTypes.timeMicros())); - Assert.assertEquals("01:00 should be 3,600,000,000", 3_600_000_000L, - (long) conversion.toLong(oneAM, TIME_MICROS_SCHEMA, LogicalTypes.timeMicros())); - Assert.assertEquals("15:14:15.926551 should be " + afternoonMicros, afternoonMicros, - (long) conversion.toLong(afternoon, TIME_MICROS_SCHEMA, LogicalTypes.timeMicros())); + assertEquals(LocalTime.MIDNIGHT, conversion.fromLong(0L, TIME_MICROS_SCHEMA, LogicalTypes.timeMicros()), + "Midnight should be 0"); + assertEquals(oneAM, conversion.fromLong(3_600_000_000L, TIME_MICROS_SCHEMA, LogicalTypes.timeMicros()), + "01:00 should be 3,600,000,000"); + assertEquals(afternoon, conversion.fromLong(afternoonMicros, TIME_MICROS_SCHEMA, LogicalTypes.timeMicros()), + "15:14:15.926551 should be " + afternoonMicros); + + assertEquals(0, (long) conversion.toLong(LocalTime.MIDNIGHT, TIME_MICROS_SCHEMA, LogicalTypes.timeMicros()), + "Midnight should be 0"); + assertEquals(3_600_000_000L, (long) conversion.toLong(oneAM, TIME_MICROS_SCHEMA, LogicalTypes.timeMicros()), + "01:00 should be 3,600,000,000"); + assertEquals(afternoonMicros, (long) conversion.toLong(afternoon, TIME_MICROS_SCHEMA, LogicalTypes.timeMicros()), + "15:14:15.926551 should be " + afternoonMicros); } @Test - public void testTimestampMillisConversion() throws Exception { + void timestampMillisConversion() throws Exception { TimestampMillisConversion conversion = new TimestampMillisConversion(); long nowInstant = Instant.now().toEpochMilli(); // ms precision // round trip Instant now = conversion.fromLong(nowInstant, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis()); long roundTrip = conversion.toLong(now, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis()); - Assert.assertEquals("Round-trip conversion should work", nowInstant, roundTrip); + assertEquals(nowInstant, roundTrip, "Round-trip conversion should work"); long May_28_2015_21_46_53_221_instant = 1432849613221L; Instant May_28_2015_21_46_53_221 = ZonedDateTime.of(2015, 5, 28, 21, 46, 53, 221_000_000, ZoneOffset.UTC) @@ -135,30 +132,33 @@ public void testTimestampMillisConversion() throws Exception { // known dates from https://www.epochconverter.com/ // > Epoch - Assert.assertEquals("Known date should be correct", May_28_2015_21_46_53_221, - conversion.fromLong(May_28_2015_21_46_53_221_instant, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis())); - Assert.assertEquals("Known date should be correct", May_28_2015_21_46_53_221_instant, - (long) conversion.toLong(May_28_2015_21_46_53_221, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis())); + assertEquals(May_28_2015_21_46_53_221, + conversion.fromLong(May_28_2015_21_46_53_221_instant, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis()), + "Known date should be correct"); + assertEquals(May_28_2015_21_46_53_221_instant, + (long) conversion.toLong(May_28_2015_21_46_53_221, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis()), + "Known date should be correct"); // Epoch - Assert.assertEquals("1970-01-01 should be 0", Instant.EPOCH, - conversion.fromLong(0L, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis())); - Assert.assertEquals("1970-01-01 should be 0", 0L, - (long) conversion.toLong(ZonedDateTime.ofInstant(Instant.EPOCH, ZoneOffset.UTC).toInstant(), - TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis())); + assertEquals(Instant.EPOCH, conversion.fromLong(0L, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis()), + "1970-01-01 should be 0"); + assertEquals(0L, (long) conversion.toLong(ZonedDateTime.ofInstant(Instant.EPOCH, ZoneOffset.UTC).toInstant(), + TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis()), "1970-01-01 should be 0"); // < Epoch long Jul_01_1969_12_00_00_123_instant = -15854400000L + 123; Instant Jul_01_1969_12_00_00_123 = ZonedDateTime.of(1969, 7, 1, 12, 0, 0, 123_000_000, ZoneOffset.UTC).toInstant(); - Assert.assertEquals("Pre 1970 date should be correct", Jul_01_1969_12_00_00_123, - conversion.fromLong(Jul_01_1969_12_00_00_123_instant, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis())); - Assert.assertEquals("Pre 1970 date should be correct", Jul_01_1969_12_00_00_123_instant, - (long) conversion.toLong(Jul_01_1969_12_00_00_123, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis())); + assertEquals(Jul_01_1969_12_00_00_123, + conversion.fromLong(Jul_01_1969_12_00_00_123_instant, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis()), + "Pre 1970 date should be correct"); + assertEquals(Jul_01_1969_12_00_00_123_instant, + (long) conversion.toLong(Jul_01_1969_12_00_00_123, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis()), + "Pre 1970 date should be correct"); } @Test - public void testTimestampMicrosConversion() throws Exception { + void timestampMicrosConversion() throws Exception { TimestampMicrosConversion conversion = new TimestampMicrosConversion(); // known dates from https://www.epochconverter.com/ @@ -167,57 +167,58 @@ public void testTimestampMicrosConversion() throws Exception { Instant May_28_2015_21_46_53_221_843 = ZonedDateTime.of(2015, 5, 28, 21, 46, 53, 221_843_000, ZoneOffset.UTC) .toInstant(); - Assert.assertEquals("Known date should be correct", May_28_2015_21_46_53_221_843, conversion - .fromLong(May_28_2015_21_46_53_221_843_instant, TIMESTAMP_MICROS_SCHEMA, LogicalTypes.timestampMicros())); + assertEquals(May_28_2015_21_46_53_221_843, conversion.fromLong(May_28_2015_21_46_53_221_843_instant, + TIMESTAMP_MICROS_SCHEMA, LogicalTypes.timestampMicros()), "Known date should be correct"); - Assert.assertEquals("Known date should be correct", May_28_2015_21_46_53_221_843_instant, (long) conversion - .toLong(May_28_2015_21_46_53_221_843, TIMESTAMP_MICROS_SCHEMA, LogicalTypes.timestampMillis())); + assertEquals(May_28_2015_21_46_53_221_843_instant, + (long) conversion.toLong(May_28_2015_21_46_53_221_843, TIMESTAMP_MICROS_SCHEMA, LogicalTypes.timestampMillis()), + "Known date should be correct"); // Epoch - Assert.assertEquals("1970-01-01 should be 0", Instant.EPOCH, - conversion.fromLong(0L, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis())); - Assert.assertEquals("1970-01-01 should be 0", 0L, - (long) conversion.toLong(ZonedDateTime.ofInstant(Instant.EPOCH, ZoneOffset.UTC).toInstant(), - TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis())); + assertEquals(Instant.EPOCH, conversion.fromLong(0L, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis()), + "1970-01-01 should be 0"); + assertEquals(0L, (long) conversion.toLong(ZonedDateTime.ofInstant(Instant.EPOCH, ZoneOffset.UTC).toInstant(), + TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis()), "1970-01-01 should be 0"); // < Epoch long Jul_01_1969_12_00_00_000_123_instant = -15854400000L * 1000 + 123; Instant Jul_01_1969_12_00_00_000_123 = ZonedDateTime.of(1969, 7, 1, 12, 0, 0, 123_000, ZoneOffset.UTC).toInstant(); - Assert.assertEquals("Pre 1970 date should be correct", Jul_01_1969_12_00_00_000_123, conversion - .fromLong(Jul_01_1969_12_00_00_000_123_instant, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis())); - Assert.assertEquals("Pre 1970 date should be correct", Jul_01_1969_12_00_00_000_123_instant, (long) conversion - .toLong(Jul_01_1969_12_00_00_000_123, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis())); + assertEquals(Jul_01_1969_12_00_00_000_123, conversion.fromLong(Jul_01_1969_12_00_00_000_123_instant, + TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis()), "Pre 1970 date should be correct"); + assertEquals(Jul_01_1969_12_00_00_000_123_instant, + (long) conversion.toLong(Jul_01_1969_12_00_00_000_123, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis()), + "Pre 1970 date should be correct"); } @Test - public void testDynamicSchemaWithDateConversion() throws ClassNotFoundException { + void dynamicSchemaWithDateConversion() throws ClassNotFoundException { Schema schema = getReflectedSchemaByName("java.time.LocalDate", new TimeConversions.DateConversion()); - Assert.assertEquals("Reflected schema should be logicalType date", DATE_SCHEMA, schema); + assertEquals(DATE_SCHEMA, schema, "Reflected schema should be logicalType date"); } @Test - public void testDynamicSchemaWithTimeConversion() throws ClassNotFoundException { + void dynamicSchemaWithTimeConversion() throws ClassNotFoundException { Schema schema = getReflectedSchemaByName("java.time.LocalTime", new TimeConversions.TimeMillisConversion()); - Assert.assertEquals("Reflected schema should be logicalType timeMillis", TIME_MILLIS_SCHEMA, schema); + assertEquals(TIME_MILLIS_SCHEMA, schema, "Reflected schema should be logicalType timeMillis"); } @Test - public void testDynamicSchemaWithTimeMicrosConversion() throws ClassNotFoundException { + void dynamicSchemaWithTimeMicrosConversion() throws ClassNotFoundException { Schema schema = getReflectedSchemaByName("java.time.LocalTime", new TimeConversions.TimeMicrosConversion()); - Assert.assertEquals("Reflected schema should be logicalType timeMicros", TIME_MICROS_SCHEMA, schema); + assertEquals(TIME_MICROS_SCHEMA, schema, "Reflected schema should be logicalType timeMicros"); } @Test - public void testDynamicSchemaWithDateTimeConversion() throws ClassNotFoundException { + void dynamicSchemaWithDateTimeConversion() throws ClassNotFoundException { Schema schema = getReflectedSchemaByName("java.time.Instant", new TimeConversions.TimestampMillisConversion()); - Assert.assertEquals("Reflected schema should be logicalType timestampMillis", TIMESTAMP_MILLIS_SCHEMA, schema); + assertEquals(TIMESTAMP_MILLIS_SCHEMA, schema, "Reflected schema should be logicalType timestampMillis"); } @Test - public void testDynamicSchemaWithDateTimeMicrosConversion() throws ClassNotFoundException { + void dynamicSchemaWithDateTimeMicrosConversion() throws ClassNotFoundException { Schema schema = getReflectedSchemaByName("java.time.Instant", new TimeConversions.TimestampMicrosConversion()); - Assert.assertEquals("Reflected schema should be logicalType timestampMicros", TIMESTAMP_MICROS_SCHEMA, schema); + assertEquals(TIMESTAMP_MICROS_SCHEMA, schema, "Reflected schema should be logicalType timestampMicros"); } private Schema getReflectedSchemaByName(String className, Conversion conversion) throws ClassNotFoundException { diff --git a/lang/java/avro/src/test/java/org/apache/avro/file/TestAllCodecs.java b/lang/java/avro/src/test/java/org/apache/avro/file/TestAllCodecs.java index 491a7e3f713..ef928db6f47 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/file/TestAllCodecs.java +++ b/lang/java/avro/src/test/java/org/apache/avro/file/TestAllCodecs.java @@ -18,43 +18,27 @@ package org.apache.avro.file; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import java.io.IOException; import java.nio.ByteBuffer; -import java.util.Arrays; -import java.util.Collection; +import java.util.stream.Stream; -import static org.junit.Assert.assertTrue; - -@RunWith(Parameterized.class) public class TestAllCodecs { - @Parameterized.Parameters(name = "{index}: codec={0}") - public static Collection data() { - return Arrays.asList(new Object[][] { { "bzip2", BZip2Codec.class }, { "zstandard", ZstandardCodec.class }, - { "null", NullCodec.class }, { "xz", XZCodec.class }, { "snappy", SnappyCodec.class }, - { "deflate", DeflateCodec.class }, }); - } - - @Parameterized.Parameter(0) - public String codec; - - @Parameterized.Parameter(1) - public Class codecClass; - - @Test - public void testCodec() throws IOException { + @ParameterizedTest + @MethodSource("codecTypes") + void codec(String codec, Class codecClass) throws IOException { int inputSize = 500_000; byte[] input = generateTestData(inputSize); Codec codecInstance = CodecFactory.fromString(codec).createInstance(); - assertTrue(codecClass.isInstance(codecInstance)); - assertTrue(codecInstance.getName().equals(codec)); + Assertions.assertTrue(codecClass.isInstance(codecInstance)); + Assertions.assertTrue(codecInstance.getName().equals(codec)); ByteBuffer inputByteBuffer = ByteBuffer.wrap(input); ByteBuffer compressedBuffer = codecInstance.compress(inputByteBuffer); @@ -62,28 +46,30 @@ public void testCodec() throws IOException { int compressedSize = compressedBuffer.remaining(); // Make sure something returned - assertTrue(compressedSize > 0); + Assertions.assertTrue(compressedSize > 0); // While the compressed size could in many real cases // *increase* compared to the input size, our input data // is extremely easy to compress and all Avro's compression algorithms // should have a compression ratio greater than 1 (except 'null'). - assertTrue(compressedSize < inputSize || codec.equals("null")); + Assertions.assertTrue(compressedSize < inputSize || codec.equals("null")); // Decompress the data ByteBuffer decompressedBuffer = codecInstance.decompress(compressedBuffer); // Validate the the input and output are equal. inputByteBuffer.rewind(); - Assert.assertEquals(decompressedBuffer, inputByteBuffer); + Assertions.assertEquals(inputByteBuffer, decompressedBuffer); } - @Test - public void testCodecSlice() throws IOException { + @ParameterizedTest + @MethodSource("codecTypes") + void codecSlice(String codec, Class codecClass) throws IOException { int inputSize = 500_000; byte[] input = generateTestData(inputSize); Codec codecInstance = CodecFactory.fromString(codec).createInstance(); + Assertions.assertTrue(codecClass.isInstance(codecInstance)); ByteBuffer partialBuffer = ByteBuffer.wrap(input); partialBuffer.position(17); @@ -94,7 +80,7 @@ public void testCodecSlice() throws IOException { int compressedSize = compressedBuffer.remaining(); // Make sure something returned - assertTrue(compressedSize > 0); + Assertions.assertTrue(compressedSize > 0); // Create a slice from the compressed buffer ByteBuffer sliceBuffer = ByteBuffer.allocate(compressedSize + 100); @@ -108,7 +94,13 @@ public void testCodecSlice() throws IOException { // Validate the the input and output are equal. inputByteBuffer.rewind(); - Assert.assertEquals(decompressedBuffer, inputByteBuffer); + Assertions.assertEquals(inputByteBuffer, decompressedBuffer); + } + + public static Stream codecTypes() { + return Stream.of(Arguments.of("bzip2", BZip2Codec.class), Arguments.of("zstandard", ZstandardCodec.class), + Arguments.of("null", NullCodec.class), Arguments.of("xz", XZCodec.class), + Arguments.of("snappy", SnappyCodec.class), Arguments.of("deflate", DeflateCodec.class)); } // Generate some test data that will compress easily diff --git a/lang/java/avro/src/test/java/org/apache/avro/file/TestCustomCodec.java b/lang/java/avro/src/test/java/org/apache/avro/file/TestCustomCodec.java index a7b0ef02e8f..930b0d34a90 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/file/TestCustomCodec.java +++ b/lang/java/avro/src/test/java/org/apache/avro/file/TestCustomCodec.java @@ -19,22 +19,23 @@ package org.apache.avro.file; import static java.nio.charset.StandardCharsets.UTF_8; -import static org.junit.Assert.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; import java.io.IOException; import java.nio.ByteBuffer; import org.apache.avro.file.codec.CustomCodec; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestCustomCodec { @Test - public void testCustomCodec() { + void customCodec() { CustomCodec customCodec = new CustomCodec(); Codec snappyCodec = new SnappyCodec.Option().createInstance(); - assertTrue(customCodec.equals(new CustomCodec())); - assertFalse(customCodec.equals(snappyCodec)); + assertEquals(customCodec, new CustomCodec()); + assertNotEquals(customCodec, snappyCodec); String testString = "Testing 123"; ByteBuffer original = ByteBuffer.allocate(testString.getBytes(UTF_8).length); diff --git a/lang/java/avro/src/test/java/org/apache/avro/file/TestIOExceptionDuringWrite.java b/lang/java/avro/src/test/java/org/apache/avro/file/TestIOExceptionDuringWrite.java index eba142a5cf9..e8dc9a609c3 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/file/TestIOExceptionDuringWrite.java +++ b/lang/java/avro/src/test/java/org/apache/avro/file/TestIOExceptionDuringWrite.java @@ -17,15 +17,16 @@ */ package org.apache.avro.file; -import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.fail; import java.io.IOException; import java.io.OutputStream; import org.apache.avro.Schema; +import org.apache.avro.SchemaParser; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.util.RandomData; -import org.junit.Test; +import org.junit.jupiter.api.Test; /* * Tests if we not write any garbage to the end of the file after any exception occurred @@ -54,10 +55,10 @@ public void write(int b) throws IOException { private static final String SCHEMA_JSON = "{\"type\": \"record\", \"name\": \"Test\", \"fields\": [" + "{\"name\":\"stringField\", \"type\":\"string\"}," + "{\"name\":\"longField\", \"type\":\"long\"}]}"; - private static final Schema SCHEMA = new Schema.Parser().parse(SCHEMA_JSON); + private static final Schema SCHEMA = SchemaParser.parseSingle(SCHEMA_JSON); @Test - public void testNoWritingAfterException() throws IOException { + void noWritingAfterException() throws IOException { try (DataFileWriter writer = new DataFileWriter<>(new GenericDatumWriter<>())) { writer.create(SCHEMA, new FailingOutputStream(100000)); int recordCnt = 0; diff --git a/lang/java/avro/src/test/java/org/apache/avro/file/TestSeekableByteArrayInput.java b/lang/java/avro/src/test/java/org/apache/avro/file/TestSeekableByteArrayInput.java index 5c8b3a8ddb5..2e6b46e5d1f 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/file/TestSeekableByteArrayInput.java +++ b/lang/java/avro/src/test/java/org/apache/avro/file/TestSeekableByteArrayInput.java @@ -17,10 +17,6 @@ */ package org.apache.avro.file; -import java.io.ByteArrayOutputStream; -import java.util.ArrayList; -import java.util.List; - import org.apache.avro.Schema; import org.apache.avro.Schema.Field; import org.apache.avro.Schema.Type; @@ -30,8 +26,19 @@ import org.apache.avro.generic.IndexedRecord; import org.apache.avro.specific.SpecificDatumWriter; import org.apache.avro.util.Utf8; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayOutputStream; +import java.io.EOFException; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; public class TestSeekableByteArrayInput { @@ -53,7 +60,7 @@ private Schema getTestSchema() throws Exception { } @Test - public void testSerialization() throws Exception { + void serialization() throws Exception { Schema testSchema = getTestSchema(); GenericRecord message = new Record(testSchema); message.put("name", "testValue"); @@ -66,8 +73,35 @@ public void testSerialization() throws Exception { FileReader dfr = DataFileReader.openReader(in, reader)) { result = dfr.next(); } - Assert.assertNotNull(result); - Assert.assertTrue(result instanceof GenericRecord); - Assert.assertEquals(new Utf8("testValue"), ((GenericRecord) result).get("name")); + assertNotNull(result); + assertInstanceOf(GenericRecord.class, result); + assertEquals(new Utf8("testValue"), ((GenericRecord) result).get("name")); + } + + @Test + void readingData() throws IOException { + byte[] data = "0123456789ABCD".getBytes(StandardCharsets.UTF_8); + byte[] result = new byte[16]; + try (SeekableInput in = new SeekableByteArrayInput(data)) { + in.read(result, 0, 8); + in.seek(4); + in.read(result, 8, 8); + assertEquals(12, in.tell()); + assertEquals(data.length, in.length()); + assertEquals("01234567456789AB", new String(result, StandardCharsets.UTF_8)); + } + } + + @Test + void illegalSeeks() throws IOException { + byte[] data = "0123456789ABCD".getBytes(StandardCharsets.UTF_8); + try (SeekableInput in = new SeekableByteArrayInput(data)) { + byte[] buf = new byte[2]; + in.read(buf, 0, buf.length); + in.seek(-4); + assertEquals(2, in.tell()); + + assertThrows(EOFException.class, () -> in.seek(64)); + } } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/file/TestSeekableInputStream.java b/lang/java/avro/src/test/java/org/apache/avro/file/TestSeekableInputStream.java new file mode 100644 index 00000000000..34dbf298215 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/file/TestSeekableInputStream.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.file; + +import org.junit.Assert; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class TestSeekableInputStream { + @Test + void readingData() throws IOException { + byte[] data = "0123456789ABCD".getBytes(StandardCharsets.UTF_8); + try (DataFileReader.SeekableInputStream sin = new DataFileReader.SeekableInputStream( + new SeekableByteArrayInput(data))) { + byte[] first8 = new byte[8]; + assertEquals(first8.length, sin.read(first8, 0, 8)); + assertArrayEquals("01234567".getBytes(StandardCharsets.UTF_8), first8); + sin.seek(4); + assertEquals(10, sin.available()); + assertEquals(2, sin.skip(2)); + assertEquals((byte) '6', sin.read()); + byte[] next4 = new byte[4]; + assertEquals(next4.length, sin.read(next4)); + assertArrayEquals("789A".getBytes(StandardCharsets.UTF_8), next4); + assertEquals(11, sin.tell()); + assertEquals(data.length, sin.length()); + } + } + + @Test + void illegalSeek() throws IOException { + try (SeekableInput in = new SeekableByteArrayInput("".getBytes(StandardCharsets.UTF_8)); + DataFileReader.SeekableInputStream sin = new DataFileReader.SeekableInputStream(in)) { + Assert.assertThrows(IOException.class, () -> sin.seek(-5)); + } + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/file/TestZstandardCodec.java b/lang/java/avro/src/test/java/org/apache/avro/file/TestZstandardCodec.java index aa002608342..7242996fae0 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/file/TestZstandardCodec.java +++ b/lang/java/avro/src/test/java/org/apache/avro/file/TestZstandardCodec.java @@ -17,19 +17,20 @@ */ package org.apache.avro.file; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.IOException; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestZstandardCodec { @Test - public void testZstandardToStringAndName() throws IOException { + void zstandardToStringAndName() throws IOException { Codec codec = CodecFactory.zstandardCodec(3).createInstance(); assertTrue(codec instanceof ZstandardCodec); - assertTrue(codec.getName().equals("zstandard")); - assertTrue(codec.toString().equals("zstandard[3]")); + assertEquals(codec.getName(), "zstandard"); + assertEquals(codec.toString(), "zstandard[3]"); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/generic/GenericDataArrayTest.java b/lang/java/avro/src/test/java/org/apache/avro/generic/GenericDataArrayTest.java new file mode 100644 index 00000000000..a4ffebac02d --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/generic/GenericDataArrayTest.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.generic; + +import org.apache.avro.Schema; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +class GenericDataArrayTest { + + @Test + void test() { + GenericData.Array array = new GenericData.Array<>(10, + Schema.createArray(Schema.create(Schema.Type.STRING))); + array.add("One"); + array.add("Two"); + array.add("Two"); + array.add("Three"); + array.add(4, "Four"); + array.remove(1); + Assertions.assertEquals(4, array.size()); + Assertions.assertEquals("One", array.get(0)); + Assertions.assertEquals("Two", array.get(1)); + Assertions.assertEquals("Three", array.get(2)); + Assertions.assertEquals("Four", array.get(3)); + } + +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/generic/GenericDataTest.java b/lang/java/avro/src/test/java/org/apache/avro/generic/GenericDataTest.java new file mode 100644 index 00000000000..040a71e2ea0 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/generic/GenericDataTest.java @@ -0,0 +1,262 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.generic; + +import org.apache.avro.Conversion; +import org.apache.avro.LogicalType; +import org.apache.avro.Schema; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.EnumMap; +import java.util.List; +import java.util.Map; + +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.*; + +class GenericDataTest { + + static Schema createSchema(Schema.Type type) { + switch (type) { + case FIXED: + return Schema.createFixed("foo", null, null, 4); + case UNION: + return Schema.createUnion(Schema.create(Schema.Type.FLOAT), Schema.create(Schema.Type.STRING)); + case MAP: + return Schema.createMap(Schema.create(Schema.Type.FLOAT)); + case ARRAY: + return Schema.createArray(Schema.create(Schema.Type.STRING)); + case RECORD: + return Schema.createRecord("record", null, null, false); + case ENUM: + return Schema.createEnum("myEnum", null, null, Collections.emptyList()); + default: + return Schema.create(type); + } + } + + static Object sampleValue(Schema schema) { + if (schema.getLogicalType() != null) { + return new Object(); + } + switch (schema.getElementType().getType()) { + case BOOLEAN: + return true; + case INT: + return Integer.MAX_VALUE; + case LONG: + return Long.MAX_VALUE; + case FLOAT: + return Float.MAX_VALUE; + case DOUBLE: + return Double.MAX_VALUE; + default: + return "foo"; + } + } + + static Schema createArraySchema(Schema.Type type) { + return Schema.createArray(createSchema(type)); + } + + static Schema createArraySchemaWithLogicalType(Schema.Type type) { + final LogicalType logicalType = new LogicalType("Mike"); + Schema schema = logicalType.addToSchema(createSchema(type)); + return Schema.createArray(schema); + } + + static Map> validMappings = new EnumMap<>(Schema.Type.class); + static { + for (Schema.Type type : Schema.Type.values()) { + switch (type) { + case INT: + validMappings.put(type, new PrimitivesArrays.IntArray(0, createArraySchema(type))); + break; + case LONG: + validMappings.put(type, new PrimitivesArrays.LongArray(0, createArraySchema(type))); + break; + case DOUBLE: + validMappings.put(type, new PrimitivesArrays.DoubleArray(0, createArraySchema(type))); + break; + case FLOAT: + validMappings.put(type, new PrimitivesArrays.FloatArray(0, createArraySchema(type))); + break; + case BOOLEAN: + validMappings.put(type, new PrimitivesArrays.BooleanArray(0, createArraySchema(type))); + break; + default: + validMappings.put(type, new GenericData.Array<>(0, createArraySchema(type))); + break; + } + } + } + + public static Stream testNewArrayData() { + + List data = new ArrayList<>(); + + validMappings.forEach((validKey, optimalValue) -> { + Class optimalValueType = optimalValue.getClass(); + // cant reuse null, or a string + final Schema arraySchema = createArraySchema(validKey); + + data.add(Arguments.of("null input, " + validKey, arraySchema, Collections.emptyList(), null, optimalValueType)); + data.add( + Arguments.of("String input, " + validKey, arraySchema, Collections.emptyList(), "foo", optimalValueType)); + // should reuse arraylist & generic array + data.add(Arguments.of("ArrayList input, " + validKey, arraySchema, Collections.emptyList(), new ArrayList<>(), + ArrayList.class)); + data.add(Arguments.of("Generic input, " + validKey, arraySchema, Collections.emptyList(), + new GenericData.Array(0, arraySchema), GenericData.Array.class)); + // with logical type + if (validKey != Schema.Type.UNION) { + data.add(Arguments.of("null (with logical type) input, " + validKey, createArraySchemaWithLogicalType(validKey), + Collections.emptyList(), null, GenericData.Array.class)); + data.add(Arguments.of("String (with logical type) input, " + validKey, + createArraySchemaWithLogicalType(validKey), Collections.emptyList(), "foo", GenericData.Array.class)); + data.add(Arguments.of("ArrayList (with logical type) input, " + validKey, arraySchema, Collections.emptyList(), + new ArrayList<>(), ArrayList.class)); + data.add(Arguments.of("Generic (with logical type) input, " + validKey, arraySchema, Collections.emptyList(), + new GenericData.Array(0, arraySchema), GenericData.Array.class)); +// with logical type and conversion + + validMappings.forEach((targetKey, targetType) -> { + if (targetKey != Schema.Type.UNION) { + data.add(Arguments.of("null (with logical type) input, " + validKey + " convert to " + targetType, + createArraySchemaWithLogicalType(targetKey), singleConversion(targetKey), null, targetType.getClass())); + data.add(Arguments.of("String (with logical type) input, " + validKey + " convert to " + targetType, + createArraySchemaWithLogicalType(targetKey), singleConversion(targetKey), "foo", + targetType.getClass())); + data.add(Arguments.of("ArrayList (with logical type) input, " + validKey + " convert to " + targetType, + createArraySchemaWithLogicalType(targetKey), singleConversion(targetKey), new ArrayList<>(), + ArrayList.class)); + data.add(Arguments.of("Generic (with logical type) input, " + validKey, arraySchema, + Collections.emptyList(), new GenericData.Array(0, arraySchema), GenericData.Array.class)); + } + }); + + } + + validMappings.forEach((suppliedValueType, suppliedValue) -> { + data.add(Arguments.of(suppliedValueType + " input " + validKey, arraySchema, Collections.emptyList(), + suppliedValue, optimalValueType)); + if (validKey != Schema.Type.UNION) + data.add(Arguments.of(suppliedValueType + " (with logical type) input " + validKey, + createArraySchemaWithLogicalType(validKey), Collections.emptyList(), suppliedValue, + GenericData.Array.class)); + }); + }); + return data.stream(); + } + + private static List> singleConversion(Schema.Type targetKey) { + return Collections.singletonList(new Conversion() { + + @Override + public Class getConvertedType() { + switch (targetKey) { + case INT: + return (Class) Integer.TYPE; + case LONG: + return (Class) Long.TYPE; + case DOUBLE: + return (Class) Double.TYPE; + case FLOAT: + return (Class) Float.TYPE; + case BOOLEAN: + return (Class) Boolean.TYPE; + default: + return (Class) Object.class; + } + + } + + @Override + public String getLogicalTypeName() { + return "Mike"; + } + + }); + } + + @ParameterizedTest + @MethodSource("testNewArrayData") + void testNewArray(String description, Schema schema, List> convertions, Object initial, + Class> expectedType) { + GenericData underTest = new GenericData(); + convertions.forEach(underTest::addLogicalTypeConversion); + + Object result = underTest.newArray(initial, 10, schema); + // never null + assertNotNull(result, description); + // should always be the best fit type, or a generic array + assertTrue(expectedType.isInstance(result) || result instanceof GenericData.Array, + result.getClass() + " when expected generic or " + expectedType.getName() + " - " + description); + + // must be a collection from the above list + Collection resultCollection = (Collection) result; + + // the result should be empty + assertEquals(0, resultCollection.size(), "not empty - " + description); + + // is the supplied type matched the return type, then we should not have + // allocated a new object + if (initial != null && initial.getClass() == result.getClass()) { + // if the result type is the same as the initial type, it should be reused, so + // we should not have allocated a new object + assertSame(initial, result, "not reused - " + description); + } + + // is the supplied type matched the return type, then we should not have + // allocated a new object + if (initial == null) { + // if we did allocate a not object, we should have allocated the optimal type + assertSame(expectedType, result.getClass(), "not optimal - " + description); + } + // check the schema was set correctly + if (result instanceof GenericContainer && result != initial) { + GenericContainer resultArray = (GenericContainer) result; + assertEquals(schema.getElementType(), resultArray.getSchema().getElementType(), + "wrong element type - " + description); + } + + // for primitive arrays, we should not have a logical type, and the underlying + // array should be the correct type + if (result instanceof PrimitivesArrays.PrimitiveArray) { + assertSame(expectedType, resultCollection.getClass(), "wrong type for primitive - " + description); + } + + final Object sample = sampleValue(schema); + resultCollection.add(sample); + assertEquals(1, resultCollection.size(), "not added - " + description); + assertEquals(sample, resultCollection.iterator().next(), "wrong value - " + description); + assertEquals(1, resultCollection.size(), "disappeared - " + description); + + Object result2 = underTest.newArray(resultCollection, 10, schema); + assertSame(result, result2, "not reused - " + description); + + assertEquals(0, resultCollection.size(), "not reset - " + description); + } + +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/generic/PrimitivesArraysTest.java b/lang/java/avro/src/test/java/org/apache/avro/generic/PrimitivesArraysTest.java new file mode 100644 index 00000000000..10435045587 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/generic/PrimitivesArraysTest.java @@ -0,0 +1,307 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.generic; + +import org.apache.avro.Schema; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +class PrimitivesArraysTest { + + @Test + void booleanArray() { + PrimitivesArrays.BooleanArray ba = new PrimitivesArrays.BooleanArray(4, + Schema.createArray(Schema.create(Schema.Type.BOOLEAN))); + + Assertions.assertEquals(0, ba.size()); + for (int i = 1; i < 100; i++) { + if (i % 3 == 0 || i % 5 == 0) { + ba.add(true); + } else { + ba.add(false); + } + } + Assertions.assertEquals(99, ba.size()); + for (int i = 1; i < 100; i++) { + if (i % 3 == 0 || i % 5 == 0) { + Assertions.assertTrue(ba.get(i - 1), "Error for " + i); + } else { + Assertions.assertFalse(ba.get(i - 1), "Error for " + i); + } + } + Assertions.assertFalse(ba.remove(12)); + Assertions.assertEquals(98, ba.size()); + for (int i = 13; i < 99; i++) { + if ((i + 1) % 3 == 0 || (i + 1) % 5 == 0) { + Assertions.assertTrue(ba.get(i - 1), "After delete, Error for " + i); + } else { + Assertions.assertFalse(ba.get(i - 1), "After delete, Error for " + i); + } + } + + ba.add(12, false); + Assertions.assertEquals(99, ba.size()); + for (int i = 1; i < 100; i++) { + if (i % 3 == 0 || i % 5 == 0) { + Assertions.assertTrue(ba.get(i - 1), "Error for " + i); + } else { + Assertions.assertFalse(ba.get(i - 1), "Error for " + i); + } + } + Assertions.assertFalse(ba.remove(12)); + ba.add(12, true); + for (int i = 1; i < 100; i++) { + if (i % 3 == 0 || i % 5 == 0 || i == 13) { + Assertions.assertTrue(ba.get(i - 1), "Error for " + i); + } else { + Assertions.assertFalse(ba.get(i - 1), "Error for " + i); + } + } + ba.add(99, true); + Assertions.assertTrue(ba.get(99), "Error for 99"); + ba.remove(99); + ba.reverse(); + for (int i = 1; i < 100; i++) { + if (i % 3 == 0 || i % 5 == 0 || i == 13) { + Assertions.assertTrue(ba.get(99 - i), "Error for " + i); + } else { + Assertions.assertFalse(ba.get(99 - i), "Error for " + i); + } + } + } + + @Test + void booleanArrayIterator() { + PrimitivesArrays.BooleanArray ba = new PrimitivesArrays.BooleanArray(4, + Schema.createArray(Schema.create(Schema.Type.BOOLEAN))); + boolean[] model = new boolean[] { true, false, false, true, true, true, false, false, true, false, false }; + for (boolean x : model) { + ba.add(x); + } + Assertions.assertEquals(model.length, ba.size()); + int index = 0; + for (Boolean b : ba) { + Assertions.assertEquals(model[index], b); + index++; + } + } + + @Test + void intArray() { + final PrimitivesArrays.IntArray intArray = new PrimitivesArrays.IntArray(4, + Schema.createArray(Schema.create(Schema.Type.INT))); + for (int i = 1; i <= 100; i++) { + intArray.add(i); + } + Assertions.assertEquals(100, intArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals(i, intArray.get(i - 1)); + } + + int expectedValue = 1; + for (Integer value : intArray) { + Assertions.assertEquals(expectedValue, value); + expectedValue++; + } + + intArray.remove(40); + Assertions.assertEquals(99, intArray.size()); + for (int i = 1; i <= 99; i++) { + if (i <= 40) { + Assertions.assertEquals(i, intArray.get(i - 1)); + } else { + Assertions.assertEquals(i + 1, intArray.get(i - 1)); + } + } + intArray.add(40, 41); + Assertions.assertEquals(100, intArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals(i, intArray.get(i - 1)); + } + intArray.set(40, 25); + Assertions.assertEquals(25, intArray.get(40)); + + Assertions.assertEquals(0, intArray.peek()); + intArray.set(40, 41); + intArray.reverse(); + Assertions.assertEquals(100, intArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals(101 - i, intArray.get(i - 1)); + } + } + + @Test + void longArray() { + final PrimitivesArrays.LongArray longArray = new PrimitivesArrays.LongArray(4, + Schema.createArray(Schema.create(Schema.Type.LONG))); + for (long i = 1; i <= 100; i++) { + longArray.add(i); + } + Assertions.assertEquals(100l, longArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals(i, longArray.get(i - 1)); + } + + int expectedValue = 1; + for (Long value : longArray) { + Assertions.assertEquals(expectedValue, value); + expectedValue++; + } + + longArray.remove(40); + Assertions.assertEquals(99, longArray.size()); + for (int i = 1; i <= 99; i++) { + if (i <= 40) { + Assertions.assertEquals(i, longArray.get(i - 1)); + } else { + Assertions.assertEquals(i + 1, longArray.get(i - 1)); + } + } + longArray.add(40, 41); + Assertions.assertEquals(100, longArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals(i, longArray.get(i - 1)); + } + longArray.set(40, 25); + Assertions.assertEquals(25, longArray.get(40)); + + Assertions.assertEquals(0, longArray.peek()); + longArray.set(40, 41); + longArray.reverse(); + Assertions.assertEquals(100, longArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals(101 - i, longArray.get(i - 1)); + } + } + + @Test + void floatArray() { + final PrimitivesArrays.FloatArray floatArray = new PrimitivesArrays.FloatArray(4, + Schema.createArray(Schema.create(Schema.Type.FLOAT))); + for (int i = 1; i <= 100; i++) { + floatArray.add(i * 3.3f); + } + Assertions.assertEquals(100, floatArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals(i * 3.3f, floatArray.get(i - 1)); + } + + float expectedValue = 1.0f; + for (Float value : floatArray) { + Assertions.assertEquals(expectedValue * 3.3f, value); + expectedValue++; + } + + floatArray.remove(40); + Assertions.assertEquals(99, floatArray.size()); + for (int i = 1; i <= 99; i++) { + if (i <= 40) { + Assertions.assertEquals(i * 3.3f, floatArray.get(i - 1)); + } else { + Assertions.assertEquals((i + 1) * 3.3f, floatArray.get(i - 1)); + } + } + floatArray.add(40, 41 * 3.3f); + Assertions.assertEquals(100, floatArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals(i * 3.3f, floatArray.get(i - 1)); + } + floatArray.set(40, 25.2f); + Assertions.assertEquals(25.2f, floatArray.get(40)); + + Assertions.assertEquals(0.0f, floatArray.peek()); + floatArray.set(40, 41 * 3.3f); + floatArray.reverse(); + Assertions.assertEquals(100, floatArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals((101 - i) * 3.3f, floatArray.get(i - 1)); + } + } + + @Test + void doubleArray() { + final PrimitivesArrays.DoubleArray doubleArray = new PrimitivesArrays.DoubleArray(4, + Schema.createArray(Schema.create(Schema.Type.DOUBLE))); + for (int i = 1; i <= 100; i++) { + doubleArray.add(i * 3.0d); + } + Assertions.assertEquals(100, doubleArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals(i * 3.0d, doubleArray.get(i - 1)); + } + + double expectedValue = 1.0f; + for (Double value : doubleArray) { + Assertions.assertEquals(expectedValue * 3.0d, value); + expectedValue++; + } + + doubleArray.remove(40); + Assertions.assertEquals(99, doubleArray.size()); + for (int i = 1; i <= 99; i++) { + if (i <= 40) { + Assertions.assertEquals(i * 3.0d, doubleArray.get(i - 1)); + } else { + Assertions.assertEquals((i + 1) * 3.0d, doubleArray.get(i - 1)); + } + } + doubleArray.add(40, 41 * 3.0d); + Assertions.assertEquals(100, doubleArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals(i * 3.0d, doubleArray.get(i - 1)); + } + doubleArray.set(40, 25.2d); + Assertions.assertEquals(25.2d, doubleArray.get(40)); + + Assertions.assertEquals(0.0d, doubleArray.peek()); + doubleArray.set(40, 41 * 3.0d); + doubleArray.reverse(); + Assertions.assertEquals(100, doubleArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals((101 - i) * 3.0d, doubleArray.get(i - 1)); + } + + doubleArray.add(Double.MAX_VALUE); + doubleArray.add(Double.MIN_VALUE); + Assertions.assertEquals(102, doubleArray.size()); + Assertions.assertEquals(Double.MAX_VALUE, doubleArray.get(100)); + Assertions.assertEquals(Double.MIN_VALUE, doubleArray.get(101)); + + // Flip the positions to make sure they still work + doubleArray.set(101, Double.MAX_VALUE); + doubleArray.set(100, Double.MIN_VALUE); + Assertions.assertEquals(102, doubleArray.size()); + Assertions.assertEquals(Double.MAX_VALUE, doubleArray.get(101)); + Assertions.assertEquals(Double.MIN_VALUE, doubleArray.get(100)); + } + + @Test + void testDoubleArrayPreservesPrecisionForNonFloatRepresentableValues() { + final PrimitivesArrays.DoubleArray doubleArray = new PrimitivesArrays.DoubleArray(1, + Schema.createArray(Schema.create(Schema.Type.DOUBLE))); + + // This value cannot be represented as a float + Double nonFloatDouble = .9; + Assertions.assertNotEquals(.9, nonFloatDouble.floatValue()); + + // Assert that the double array does not lose precision when adding + doubleArray.add(nonFloatDouble); + Assertions.assertEquals(nonFloatDouble, doubleArray.get(0)); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericConcreteEnum.java b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericConcreteEnum.java index c01e32fdfb9..bf56d0ca4e1 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericConcreteEnum.java +++ b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericConcreteEnum.java @@ -24,14 +24,13 @@ import org.apache.avro.io.Encoder; import org.apache.avro.io.EncoderFactory; import org.apache.avro.specific.SpecificDatumReader; -import org.junit.Test; - +import org.junit.jupiter.api.Test; import java.io.ByteArrayOutputStream; + +import static org.junit.jupiter.api.Assertions.assertEquals; import java.io.IOException; import java.util.Collections; -import static org.junit.Assert.assertEquals; - /** * See AVRO-1810: GenericDatumWriter broken with Enum */ @@ -47,7 +46,7 @@ private static byte[] serializeRecord(FooBarSpecificRecord fooBarSpecificRecord) } @Test - public void testGenericWriteAndRead() throws IOException { + void genericWriteAndRead() throws IOException { FooBarSpecificRecord specificRecord = getRecord(); byte[] bytes = serializeRecord(specificRecord); @@ -62,7 +61,7 @@ public void testGenericWriteAndRead() throws IOException { } @Test - public void testGenericWriteSpecificRead() throws IOException { + void genericWriteSpecificRead() throws IOException { FooBarSpecificRecord specificRecord = getRecord(); byte[] bytes = serializeRecord(specificRecord); diff --git a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericData.java b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericData.java index c8690ea9265..3a563725ef5 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericData.java +++ b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericData.java @@ -19,12 +19,7 @@ import static org.apache.avro.TestCircularReferences.Reference; import static org.apache.avro.TestCircularReferences.Referenceable; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.*; import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonParseException; @@ -45,72 +40,91 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.UUID; + import org.apache.avro.AvroRuntimeException; import org.apache.avro.Schema; import org.apache.avro.Schema.Field; import org.apache.avro.Schema.Type; import org.apache.avro.SchemaBuilder; +import org.apache.avro.SchemaParser; import org.apache.avro.TestCircularReferences.ReferenceManager; import org.apache.avro.generic.GenericData.Record; import org.apache.avro.io.BinaryData; import org.apache.avro.io.BinaryEncoder; import org.apache.avro.io.EncoderFactory; import org.apache.avro.util.Utf8; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestGenericData { - @Test(expected = AvroRuntimeException.class) - public void testrecordConstructorNullSchema() throws Exception { - new GenericData.Record(null); + @Test + void recordConstructorNullSchema() throws Exception { + assertThrows(AvroRuntimeException.class, () -> { + new GenericData.Record(null); + }); } - @Test(expected = AvroRuntimeException.class) - public void testrecordConstructorWrongSchema() throws Exception { - new GenericData.Record(Schema.create(Schema.Type.INT)); + @Test + void recordConstructorWrongSchema() throws Exception { + assertThrows(AvroRuntimeException.class, () -> { + new GenericData.Record(Schema.create(Schema.Type.INT)); + }); } - @Test(expected = AvroRuntimeException.class) - public void testArrayConstructorNullSchema() throws Exception { - new GenericData.Array<>(1, null); + @Test + void arrayConstructorNullSchema() throws Exception { + assertThrows(AvroRuntimeException.class, () -> { + new GenericData.Array<>(1, null); + }); } - @Test(expected = AvroRuntimeException.class) - public void testArrayConstructorWrongSchema() throws Exception { - new GenericData.Array<>(1, Schema.create(Schema.Type.INT)); + @Test + void arrayConstructorWrongSchema() throws Exception { + assertThrows(AvroRuntimeException.class, () -> { + new GenericData.Array<>(1, Schema.create(Schema.Type.INT)); + }); } - @Test(expected = AvroRuntimeException.class) - public void testRecordCreateEmptySchema() throws Exception { - Schema s = Schema.createRecord("schemaName", "schemaDoc", "namespace", false); - new GenericData.Record(s); + @Test + void recordCreateEmptySchema() throws Exception { + assertThrows(AvroRuntimeException.class, () -> { + Schema s = Schema.createRecord("schemaName", "schemaDoc", "namespace", false); + new GenericData.Record(s); + }); } - @Test(expected = AvroRuntimeException.class) - public void testGetEmptySchemaFields() throws Exception { - Schema s = Schema.createRecord("schemaName", "schemaDoc", "namespace", false); - s.getFields(); + @Test + void getEmptySchemaFields() throws Exception { + assertThrows(AvroRuntimeException.class, () -> { + Schema s = Schema.createRecord("schemaName", "schemaDoc", "namespace", false); + s.getFields(); + }); } - @Test(expected = AvroRuntimeException.class) - public void testGetEmptySchemaField() throws Exception { - Schema s = Schema.createRecord("schemaName", "schemaDoc", "namespace", false); - s.getField("foo"); + @Test + void getEmptySchemaField() throws Exception { + assertThrows(AvroRuntimeException.class, () -> { + Schema s = Schema.createRecord("schemaName", "schemaDoc", "namespace", false); + s.getField("foo"); + }); } - @Test(expected = AvroRuntimeException.class) - public void testRecordPutInvalidField() throws Exception { - Schema s = Schema.createRecord("schemaName", "schemaDoc", "namespace", false); - List fields = new ArrayList<>(); - fields.add(new Schema.Field("someFieldName", s, "docs", null)); - s.setFields(fields); - Record r = new GenericData.Record(s); - r.put("invalidFieldName", "someValue"); + @Test + void recordPutInvalidField() throws Exception { + assertThrows(AvroRuntimeException.class, () -> { + Schema s = Schema.createRecord("schemaName", "schemaDoc", "namespace", false); + List fields = new ArrayList<>(); + fields.add(new Schema.Field("someFieldName", s, "docs", null)); + s.setFields(fields); + Record r = new GenericData.Record(s); + r.put("invalidFieldName", "someValue"); + }); } - @Test /** Make sure that even with nulls, hashCode() doesn't throw NPE. */ - public void testHashCode() { + @Test + void testHashCode() { GenericData.get().hashCode(null, Schema.create(Type.NULL)); GenericData.get().hashCode(null, Schema.createUnion(Arrays.asList(Schema.create(Type.BOOLEAN), Schema.create(Type.STRING)))); @@ -123,7 +137,7 @@ public void testHashCode() { } @Test - public void testEquals() { + void testEquals() { Schema s = recordSchema(); GenericRecord r0 = new GenericData.Record(s); GenericRecord r1 = new GenericData.Record(s); @@ -143,6 +157,168 @@ public void testEquals() { assertEquals(r1, r2); } + @Test + public void testMapKeyEqualsStringAndUtf8Compatibility() { + Field myMapField = new Field("my_map", Schema.createMap(Schema.create(Schema.Type.STRING)), null, null); + Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); + schema.setFields(Arrays.asList(myMapField)); + GenericRecord r0 = new GenericData.Record(schema); + GenericRecord r1 = new GenericData.Record(schema); + + HashMap pair1 = new HashMap<>(); + pair1.put("keyOne", "valueOne"); + r0.put("my_map", pair1); + + HashMap pair2 = new HashMap<>(); + pair2.put(new Utf8("keyOne"), "valueOne"); + r1.put("my_map", pair2); + + assertEquals(r0, r1); + assertEquals(r1, r0); + } + + @Test + public void testMapValuesEqualsStringAndUtf8Compatibility() { + Field myMapField = new Field("my_map", Schema.createMap(Schema.create(Schema.Type.STRING)), null, null); + Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); + schema.setFields(Arrays.asList(myMapField)); + GenericRecord r0 = new GenericData.Record(schema); + GenericRecord r1 = new GenericData.Record(schema); + + HashMap pair1 = new HashMap<>(); + pair1.put("keyOne", "valueOne"); + r0.put("my_map", pair1); + + HashMap pair2 = new HashMap<>(); + pair2.put("keyOne", new Utf8("valueOne")); + r1.put("my_map", pair2); + + assertEquals(r0, r1); + assertEquals(r1, r0); + } + + @Test + public void testEqualsEmptyMaps() { + Field myMapField = new Field("my_map", Schema.createMap(Schema.create(Schema.Type.STRING)), null, null); + Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); + schema.setFields(Arrays.asList(myMapField)); + + GenericRecord r0 = new GenericData.Record(schema); + r0.put("my_map", new HashMap<>()); + GenericRecord r1 = new GenericData.Record(schema); + r1.put("my_map", new HashMap<>()); + + assertEquals(r0, r1); + assertEquals(r1, r0); + } + + @Test + public void testEqualsEmptyMapAndNonEmptyMap() { + Field myMapField = new Field("my_map", Schema.createMap(Schema.create(Schema.Type.STRING)), null, null); + Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); + schema.setFields(Arrays.asList(myMapField)); + + GenericRecord r0 = new GenericData.Record(schema); + r0.put("my_map", new HashMap<>()); + GenericRecord r1 = new GenericData.Record(schema); + HashMap pair1 = new HashMap<>(); + pair1.put("keyOne", "valueOne"); + r1.put("my_map", pair1); + + assertNotEquals(r0, r1); + assertNotEquals(r1, r0); + } + + @Test + public void testEqualsMapAndSubset() { + Field myMapField = new Field("my_map", Schema.createMap(Schema.create(Schema.Type.STRING)), null, null); + Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); + schema.setFields(Arrays.asList(myMapField)); + + GenericRecord r0 = new GenericData.Record(schema); + HashMap m1 = new HashMap<>(); + m1.put("keyOne", "valueOne"); + m1.put("keyTwo", "valueTwo"); + r0.put("my_map", m1); + + GenericRecord r1 = new GenericData.Record(schema); + HashMap m2 = new HashMap<>(); + m2.put("keyOne", "valueOne"); + r1.put("my_map", m2); + + assertNotEquals(r0, r1); + assertNotEquals(r1, r0); + } + + @Test + public void testEqualsMapAndSameSizeMapWithDifferentKeys() { + Field myMapField = new Field("my_map", Schema.createMap(Schema.create(Schema.Type.STRING)), null, null); + Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); + schema.setFields(Arrays.asList(myMapField)); + + GenericRecord r0 = new GenericData.Record(schema); + HashMap m1 = new HashMap<>(); + m1.put("keyOne", "valueOne"); + r0.put("my_map", m1); + + GenericRecord r1 = new GenericData.Record(schema); + HashMap m2 = new HashMap<>(); + m2.put("keyTwo", "valueTwo"); + r1.put("my_map", m2); + + assertNotEquals(r0, r1); + assertNotEquals(r1, r0); + } + + @Test + public void testEqualsMapAndSameSizeMapWithDifferentValues() { + Field myMapField = new Field("my_map", Schema.createMap(Schema.create(Schema.Type.STRING)), null, null); + Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); + schema.setFields(Arrays.asList(myMapField)); + + GenericRecord r0 = new GenericData.Record(schema); + HashMap m1 = new HashMap<>(); + m1.put("keyOne", "valueOne"); + r0.put("my_map", m1); + + GenericRecord r1 = new GenericData.Record(schema); + HashMap m2 = new HashMap<>(); + m2.put("keyOne", "valueTwo"); + r1.put("my_map", m2); + + assertNotEquals(r0, r1); + assertNotEquals(r1, r0); + } + + @Test + public void testArrayValuesEqualsStringAndUtf8Compatibility() { + Field myArrayField = new Field("my_array", Schema.createArray(Schema.create(Schema.Type.STRING)), null, null); + Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); + schema.setFields(Arrays.asList(myArrayField)); + GenericRecord r0 = new GenericData.Record(schema); + GenericRecord r1 = new GenericData.Record(schema); + + List array1 = Arrays.asList("valueOne"); + r0.put("my_array", array1); + + List array2 = Arrays.asList(new Utf8("valueOne")); + r1.put("my_array", array2); + + assertEquals(r0, r1); + assertEquals(r1, r0); + } + + // AVRO-4139 + @Test + public void testEqualsMapInArray() { + Schema schema = Schema.createArray(Schema.createMap(Schema.create(Schema.Type.STRING))); + GenericData.Array> a1 = new GenericData.Array<>(10, schema); + GenericData.Array> a2 = new GenericData.Array<>(10, schema); + a1.add(Map.of("a", "b")); + a2.add(Map.of("a", "b")); + assertEquals(a1, a2); + } + private Schema recordSchema() { List fields = new ArrayList<>(); fields.add(new Field("anArray", Schema.createArray(Schema.create(Type.STRING)), null, null)); @@ -153,7 +329,7 @@ private Schema recordSchema() { } @Test - public void testEquals2() { + void equals2() { Schema schema1 = Schema.createRecord("r", null, "x", false); List fields1 = new ArrayList<>(); fields1.add(new Field("a", Schema.create(Schema.Type.STRING), null, null, Field.Order.IGNORE)); @@ -171,19 +347,21 @@ public void testEquals2() { GenericRecord record2 = new GenericData.Record(schema2); record2.put("a", "2"); - assertFalse(record2.equals(record1)); - assertFalse(record1.equals(record2)); + assertNotEquals(record2, record1); + assertNotEquals(record1, record2); } - @Test(expected = AvroRuntimeException.class) - public void testRecordGetFieldDoesntExist() throws Exception { - Schema schema = Schema.createRecord("test", "doc", "test", false, Collections.EMPTY_LIST); - GenericData.Record record = new GenericData.Record(schema); - record.get("does not exist"); + @Test + void recordGetFieldDoesntExist() throws Exception { + assertThrows(AvroRuntimeException.class, () -> { + Schema schema = Schema.createRecord("test", "doc", "test", false, Collections.EMPTY_LIST); + GenericData.Record record = new GenericData.Record(schema); + record.get("does not exist"); + }); } @Test - public void testArrayReversal() { + void arrayReversal() { Schema schema = Schema.createArray(Schema.create(Schema.Type.INT)); GenericArray forward = new GenericData.Array<>(10, schema); GenericArray backward = new GenericData.Array<>(10, schema); @@ -194,11 +372,11 @@ public void testArrayReversal() { backward.add(i); } forward.reverse(); - assertTrue(forward.equals(backward)); + assertEquals(forward, backward); } @Test - public void testArrayListInterface() { + void arrayListInterface() { Schema schema = Schema.createArray(Schema.create(Schema.Type.INT)); GenericArray array = new GenericData.Array<>(1, schema); array.add(99); @@ -224,7 +402,7 @@ public void testArrayListInterface() { } @Test - public void testArrayAddAtLocation() { + void arrayAddAtLocation() { Schema schema = Schema.createArray(Schema.create(Schema.Type.INT)); GenericArray array = new GenericData.Array<>(6, schema); array.clear(); @@ -254,7 +432,7 @@ public void testArrayAddAtLocation() { } @Test - public void testArrayRemove() { + void arrayRemove() { Schema schema = Schema.createArray(Schema.create(Schema.Type.INT)); GenericArray array = new GenericData.Array<>(10, schema); array.clear(); @@ -297,7 +475,7 @@ public void testArrayRemove() { } @Test - public void testArraySet() { + void arraySet() { Schema schema = Schema.createArray(Schema.create(Schema.Type.INT)); GenericArray array = new GenericData.Array<>(10, schema); array.clear(); @@ -313,7 +491,7 @@ public void testArraySet() { } @Test - public void testToStringIsJson() throws JsonParseException, IOException { + void toStringIsJson() throws JsonParseException, IOException { Field stringField = new Field("string", Schema.create(Type.STRING), null, null); Field enumField = new Field("enum", Schema.createEnum("my_enum", "doc", null, Arrays.asList("a", "b", "c")), null, null); @@ -335,18 +513,18 @@ public void testToStringIsJson() throws JsonParseException, IOException { } @Test - public void testMapWithNonStringKeyToStringIsJson() throws Exception { - Schema intMapSchema = new Schema.Parser() - .parse("{\"type\": \"map\", \"values\": \"string\", \"java-key-class\" : \"java.lang.Integer\"}"); + void mapWithNonStringKeyToStringIsJson() throws Exception { + Schema intMapSchema = SchemaParser + .parseSingle("{\"type\": \"map\", \"values\": \"string\", \"java-key-class\" : \"java.lang.Integer\"}"); Field intMapField = new Field("intMap", Schema.createMap(intMapSchema), null, null); - Schema decMapSchema = new Schema.Parser() - .parse("{\"type\": \"map\", \"values\": \"string\", \"java-key-class\" : \"java.math.BigDecimal\"}"); + Schema decMapSchema = SchemaParser + .parseSingle("{\"type\": \"map\", \"values\": \"string\", \"java-key-class\" : \"java.math.BigDecimal\"}"); Field decMapField = new Field("decMap", Schema.createMap(decMapSchema), null, null); - Schema boolMapSchema = new Schema.Parser() - .parse("{\"type\": \"map\", \"values\": \"string\", \"java-key-class\" : \"java.lang.Boolean\"}"); + Schema boolMapSchema = SchemaParser + .parseSingle("{\"type\": \"map\", \"values\": \"string\", \"java-key-class\" : \"java.lang.Boolean\"}"); Field boolMapField = new Field("boolMap", Schema.createMap(boolMapSchema), null, null); - Schema fileMapSchema = new Schema.Parser() - .parse("{\"type\": \"map\", \"values\": \"string\", \"java-key-class\" : \"java.io.File\"}"); + Schema fileMapSchema = SchemaParser + .parseSingle("{\"type\": \"map\", \"values\": \"string\", \"java-key-class\" : \"java.io.File\"}"); Field fileMapField = new Field("fileMap", Schema.createMap(fileMapSchema), null, null); Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); schema.setFields(Arrays.asList(intMapField, decMapField, boolMapField, fileMapField)); @@ -384,7 +562,7 @@ public void testMapWithNonStringKeyToStringIsJson() throws Exception { } @Test - public void testToStringEscapesControlCharsInBytes() throws Exception { + void toStringEscapesControlCharsInBytes() throws Exception { GenericData data = GenericData.get(); ByteBuffer bytes = ByteBuffer.wrap(new byte[] { 'a', '\n', 'b' }); assertEquals("\"a\\nb\"", data.toString(bytes)); @@ -392,7 +570,7 @@ public void testToStringEscapesControlCharsInBytes() throws Exception { } @Test - public void testToStringEscapesControlCharsInMap() { + void toStringEscapesControlCharsInMap() { GenericData data = GenericData.get(); Map m = new HashMap<>(); m.put("a\n\\b", "a\n\\b"); @@ -400,20 +578,20 @@ public void testToStringEscapesControlCharsInMap() { } @Test - public void testToStringFixed() throws Exception { + void toStringFixed() throws Exception { GenericData data = GenericData.get(); assertEquals("[97, 10, 98]", data.toString(new GenericData.Fixed(Schema.createFixed("test", null, null, 3), new byte[] { 'a', '\n', 'b' }))); } @Test - public void testToStringDoesNotEscapeForwardSlash() throws Exception { + void toStringDoesNotEscapeForwardSlash() throws Exception { GenericData data = GenericData.get(); assertEquals("\"/\"", data.toString("/")); } @Test - public void testToStringNanInfinity() throws Exception { + void toStringNanInfinity() throws Exception { GenericData data = GenericData.get(); assertEquals("\"Infinity\"", data.toString(Float.POSITIVE_INFINITY)); assertEquals("\"-Infinity\"", data.toString(Float.NEGATIVE_INFINITY)); @@ -424,7 +602,7 @@ public void testToStringNanInfinity() throws Exception { } @Test - public void testToStringConvertsDatesAsStrings() throws Exception { + void toStringConvertsDatesAsStrings() throws Exception { GenericData data = GenericData.get(); assertEquals("\"1961-04-12T06:07:10Z\"", data.toString(Instant.parse("1961-04-12T06:07:10Z"))); assertEquals("\"1961-04-12\"", data.toString(LocalDate.parse("1961-04-12"))); @@ -433,7 +611,14 @@ public void testToStringConvertsDatesAsStrings() throws Exception { } @Test - public void testCompare() { + void ToStringConvertsUuidsAsStrings() throws Exception { + GenericData data = GenericData.get(); + assertEquals("\"abf2f1e8-cece-4fdc-290a-babaca09ec74\"", + data.toString(UUID.fromString("abf2f1e8-cece-4fdc-290a-babaca09ec74"))); + } + + @Test + void compare() { // Prepare a schema for testing. Field integerField = new Field("test", Schema.create(Type.INT), null, null); List fields = new ArrayList<>(); @@ -475,7 +660,7 @@ public void testCompare() { } @Test - public void testEnumCompare() { + void enumCompare() { Schema s = Schema.createEnum("Kind", null, null, Arrays.asList("Z", "Y", "X")); GenericEnumSymbol z = new GenericData.EnumSymbol(s, "Z"); GenericEnumSymbol z2 = new GenericData.EnumSymbol(s, "Z"); @@ -486,7 +671,7 @@ public void testEnumCompare() { } @Test - public void testByteBufferDeepCopy() { + void byteBufferDeepCopy() { // Test that a deep copy of a byte buffer respects the byte buffer // limits and capacity. byte[] buffer_value = { 0, 1, 2, 3, 0, 0, 0 }; @@ -505,7 +690,7 @@ public void testByteBufferDeepCopy() { } @Test - public void testValidateNullableEnum() { + void validateNullableEnum() { List unionTypes = new ArrayList<>(); Schema schema; Schema nullSchema = Schema.create(Type.NULL); @@ -542,10 +727,10 @@ public void testValidateNullableEnum() { private enum anEnum { ONE, TWO, THREE - }; + } @Test - public void validateRequiresGenericSymbolForEnumSchema() { + void validateRequiresGenericSymbolForEnumSchema() { final Schema schema = Schema.createEnum("my_enum", "doc", "namespace", Arrays.asList("ONE", "TWO", "THREE")); final GenericData gd = GenericData.get(); @@ -554,12 +739,12 @@ public void validateRequiresGenericSymbolForEnumSchema() { assertTrue(gd.validate(schema, new GenericData.EnumSymbol(schema, anEnum.ONE))); /* negative cases */ - assertFalse("We don't expect GenericData to allow a String datum for an enum schema", gd.validate(schema, "ONE")); - assertFalse("We don't expect GenericData to allow a Java Enum for an enum schema", gd.validate(schema, anEnum.ONE)); + assertFalse(gd.validate(schema, "ONE"), "We don't expect GenericData to allow a String datum for an enum schema"); + assertFalse(gd.validate(schema, anEnum.ONE), "We don't expect GenericData to allow a Java Enum for an enum schema"); } @Test - public void testValidateUnion() { + void validateUnion() { Schema type1Schema = SchemaBuilder.record("Type1").fields().requiredString("myString").requiredInt("myInt") .endRecord(); @@ -578,7 +763,7 @@ public void testValidateUnion() { * Record, Map and Array this is correct, for the rest is is not. */ @Test - public void testToStringSameValues() throws IOException { + void toStringSameValues() throws IOException { List fields = new ArrayList<>(); fields.add(new Field("nullstring1", Schema.create(Type.STRING), null, null)); fields.add(new Field("nullstring2", Schema.create(Type.STRING), null, null)); @@ -667,14 +852,14 @@ public void testToStringSameValues() throws IOException { testRecord.put("map2", map); String testString = testRecord.toString(); - assertFalse("Record with duplicated values results in wrong 'toString()'", - testString.contains("CIRCULAR REFERENCE")); + assertFalse(testString.contains("CIRCULAR REFERENCE"), + "Record with duplicated values results in wrong 'toString()'"); } // Test copied from Apache Parquet: // org.apache.parquet.avro.TestCircularReferences @Test - public void testToStringRecursive() throws IOException { + void toStringRecursive() throws IOException { ReferenceManager manager = new ReferenceManager(); GenericData model = new GenericData(); model.addLogicalTypeConversion(manager.getTracker()); @@ -725,12 +910,12 @@ public void testToStringRecursive() throws IOException { } } - @Test /** * check that GenericArray.reset() retains reusable elements and that * GenericArray.prune() cleans them up properly. */ - public void testGenericArrayPeek() { + @Test + void genericArrayPeek() { Schema elementSchema = SchemaBuilder.record("element").fields().requiredString("value").endRecord(); Schema arraySchema = Schema.createArray(elementSchema); diff --git a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericDatumReader.java b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericDatumReader.java new file mode 100644 index 00000000000..5586b828999 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericDatumReader.java @@ -0,0 +1,306 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.generic; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.io.ByteArrayOutputStream; +import java.io.EOFException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Random; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import org.apache.avro.Schema; +import org.apache.avro.io.BinaryDecoder; +import org.apache.avro.io.BinaryEncoder; +import org.apache.avro.io.DecoderFactory; +import org.apache.avro.io.EncoderFactory; +import org.junit.jupiter.api.Test; + +public class TestGenericDatumReader { + + private static final Random r = new Random(System.currentTimeMillis()); + + @Test + void readerCache() { + final GenericDatumReader.ReaderCache cache = new GenericDatumReader.ReaderCache(this::findStringClass); + List threads = IntStream.rangeClosed(1, 200).mapToObj((int index) -> { + final Schema schema = TestGenericDatumReader.this.build(index); + final WithSchema s = new WithSchema(schema, cache); + return (Runnable) () -> s.test(); + }).map(Thread::new).collect(Collectors.toList()); + threads.forEach(Thread::start); + threads.forEach((Thread t) -> { + try { + t.join(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + }); + } + + @Test + void newInstanceFromString() { + final GenericDatumReader.ReaderCache cache = new GenericDatumReader.ReaderCache(this::findStringClass); + + Object object = cache.newInstanceFromString(StringBuilder.class, "Hello"); + assertEquals(StringBuilder.class, object.getClass()); + StringBuilder builder = (StringBuilder) object; + assertEquals("Hello", builder.toString()); + + } + + static class WithSchema { + private final Schema schema; + + private final GenericDatumReader.ReaderCache cache; + + public WithSchema(Schema schema, GenericDatumReader.ReaderCache cache) { + this.schema = schema; + this.cache = cache; + } + + public void test() { + this.cache.getStringClass(schema); + } + } + + private List list = new ArrayList<>(); + + private Schema build(int index) { + int schemaNum = (index - 1) % 50; + if (index <= 50) { + Schema schema = Schema.createRecord("record_" + schemaNum, "doc", "namespace", false, + Arrays.asList(new Schema.Field("field" + schemaNum, Schema.create(Schema.Type.STRING)))); + list.add(schema); + } + + return list.get(schemaNum); + } + + private Class findStringClass(Schema schema) { + this.sleep(); + if (schema.getType() == Schema.Type.INT) { + return Integer.class; + } + if (schema.getType() == Schema.Type.STRING) { + return String.class; + } + if (schema.getType() == Schema.Type.LONG) { + return Long.class; + } + if (schema.getType() == Schema.Type.FLOAT) { + return Float.class; + } + return String.class; + } + + private void sleep() { + long timeToSleep = r.nextInt(30) + 10L; + if (timeToSleep > 25) { + try { + Thread.sleep(timeToSleep); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + } + + // --- minBytesPerElement tests --- + + @Test + void testMinBytesPerElementPrimitives() { + assertEquals(0, GenericDatumReader.minBytesPerElement(Schema.create(Schema.Type.NULL))); + assertEquals(1, GenericDatumReader.minBytesPerElement(Schema.create(Schema.Type.BOOLEAN))); + assertEquals(1, GenericDatumReader.minBytesPerElement(Schema.create(Schema.Type.INT))); + assertEquals(1, GenericDatumReader.minBytesPerElement(Schema.create(Schema.Type.LONG))); + assertEquals(4, GenericDatumReader.minBytesPerElement(Schema.create(Schema.Type.FLOAT))); + assertEquals(8, GenericDatumReader.minBytesPerElement(Schema.create(Schema.Type.DOUBLE))); + assertEquals(1, GenericDatumReader.minBytesPerElement(Schema.create(Schema.Type.STRING))); + assertEquals(1, GenericDatumReader.minBytesPerElement(Schema.create(Schema.Type.BYTES))); + } + + @Test + void testMinBytesPerElementFixed() { + assertEquals(0, GenericDatumReader.minBytesPerElement(Schema.createFixed("ZeroFixed", null, "test", 0))); + assertEquals(5, GenericDatumReader.minBytesPerElement(Schema.createFixed("FiveFixed", null, "test", 5))); + assertEquals(16, GenericDatumReader.minBytesPerElement(Schema.createFixed("SixteenFixed", null, "test", 16))); + } + + @Test + void testMinBytesPerElementUnion() { + // Union always >= 1 byte (branch index varint) + Schema nullableInt = Schema.createUnion(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.INT)); + assertEquals(1, GenericDatumReader.minBytesPerElement(nullableInt)); + } + + @Test + void testMinBytesPerElementRecord() { + // Empty record = 0 bytes + Schema emptyRecord = Schema.createRecord("Empty", null, "test", false); + emptyRecord.setFields(Collections.emptyList()); + assertEquals(0, GenericDatumReader.minBytesPerElement(emptyRecord)); + + // Record with a single non-null field >= 1 byte + Schema recWithInt = Schema.createRecord("WithInt", null, "test", false); + recWithInt.setFields(Collections.singletonList(new Schema.Field("x", Schema.create(Schema.Type.INT)))); + assertEquals(1, GenericDatumReader.minBytesPerElement(recWithInt)); + + // Record with only null fields = 0 bytes + Schema recWithNull = Schema.createRecord("WithNull", null, "test", false); + recWithNull.setFields(Collections.singletonList(new Schema.Field("n", Schema.create(Schema.Type.NULL)))); + assertEquals(0, GenericDatumReader.minBytesPerElement(recWithNull)); + + Schema recWithMultipleFields = Schema.createRecord("WithMultipleFields", null, "test", false); + recWithMultipleFields.setFields(Arrays.asList(new Schema.Field("f", Schema.create(Schema.Type.FLOAT)), + new Schema.Field("d", Schema.create(Schema.Type.DOUBLE)))); + assertEquals(12, GenericDatumReader.minBytesPerElement(recWithMultipleFields)); + } + + @Test + void testMinBytesPerElementNestedCollections() { + // Array and map types are >= 1 byte (count varint) + assertEquals(1, GenericDatumReader.minBytesPerElement(Schema.createArray(Schema.create(Schema.Type.INT)))); + assertEquals(1, GenericDatumReader.minBytesPerElement(Schema.createMap(Schema.create(Schema.Type.INT)))); + } + + // --- Collection byte validation end-to-end tests --- + + /** + * Encodes the given longs as Avro varints into a byte array. + */ + private static byte[] encodeVarints(long... values) throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BinaryEncoder enc = EncoderFactory.get().directBinaryEncoder(baos, null); + for (long v : values) { + enc.writeLong(v); + } + enc.flush(); + return baos.toByteArray(); + } + + /** + * Verify that reading an array of ints with a huge count but no element data + * throws EOFException from the schema-aware byte check. + */ + @Test + void arrayOfIntsRejectsHugeCount() throws Exception { + Schema schema = Schema.createArray(Schema.create(Schema.Type.INT)); + GenericDatumReader reader = new GenericDatumReader<>(schema); + + // Binary: varint(10_000_000) for block count, varint(0) for terminator. + // No actual element data -- the reader should reject before allocating. + byte[] data = encodeVarints(10_000_000L, 0L); + BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(data, null); + assertThrows(EOFException.class, () -> reader.read(null, decoder)); + } + + /** + * Verify that reading an array of nulls with a large count SUCCEEDS because + * null elements are 0 bytes each, so the byte check is correctly skipped. + */ + @Test + void arrayOfNullsAcceptsLargeCount() throws Exception { + Schema schema = Schema.createArray(Schema.create(Schema.Type.NULL)); + GenericDatumReader reader = new GenericDatumReader<>(schema); + + // Binary: varint(1000) for block count, varint(0) for terminator. + // 1000 null elements = 0 bytes of element data. + byte[] data = encodeVarints(1000L, 0L); + BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(data, null); + GenericData.Array result = (GenericData.Array) reader.read(null, decoder); + assertEquals(1000, result.size()); + } + + /** + * Verify that reading a map of string->int with a huge count throws + * EOFException. Each map entry needs at least 2 bytes (1 for key length varint + * + 1 for int value). + */ + @Test + void mapOfStringToIntRejectsHugeCount() throws Exception { + Schema schema = Schema.createMap(Schema.create(Schema.Type.INT)); + GenericDatumReader reader = new GenericDatumReader<>(schema); + + byte[] data = encodeVarints(10_000_000L, 0L); + BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(data, null); + assertThrows(EOFException.class, () -> reader.read(null, decoder)); + } + + /** + * Verify that reading a map of string->null with a huge count also throws + * EOFException because map keys are always strings (at least 1 byte each). + */ + @Test + void mapOfStringToNullRejectsHugeCount() throws Exception { + Schema schema = Schema.createMap(Schema.create(Schema.Type.NULL)); + GenericDatumReader reader = new GenericDatumReader<>(schema); + + byte[] data = encodeVarints(10_000_000L, 0L); + BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(data, null); + assertThrows(EOFException.class, () -> reader.read(null, decoder)); + } + + /** + * Verify that reading an array of zero-length fixed elements with a large count + * SUCCEEDS because zero-length fixed elements are 0 bytes each. + */ + @Test + void arrayOfZeroLengthFixedAcceptsLargeCount() throws Exception { + Schema fixedSchema = Schema.createFixed("Empty", null, "test", 0); + Schema schema = Schema.createArray(fixedSchema); + GenericDatumReader reader = new GenericDatumReader<>(schema); + + byte[] data = encodeVarints(500L, 0L); + BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(data, null); + GenericData.Array result = (GenericData.Array) reader.read(null, decoder); + assertEquals(500, result.size()); + } + + @Test + void arrayOfRecordsRejectsHugeCountUsingFullRecordSize() throws Exception { + Schema recordSchema = Schema.createRecord("Element", null, "test", false); + recordSchema.setFields(Arrays.asList(new Schema.Field("f", Schema.create(Schema.Type.FLOAT)), + new Schema.Field("d", Schema.create(Schema.Type.DOUBLE)))); + Schema schema = Schema.createArray(recordSchema); + GenericDatumReader reader = new GenericDatumReader<>(schema); + + byte[] data = encodeVarints(2L, 0L); + BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(data, null); + assertThrows(EOFException.class, () -> reader.read(null, decoder)); + } + + @Test + void mapOfRecordsRejectsHugeCountUsingFullRecordSize() throws Exception { + Schema recordSchema = Schema.createRecord("MapValue", null, "test", false); + recordSchema.setFields(Arrays.asList(new Schema.Field("f", Schema.create(Schema.Type.FLOAT)), + new Schema.Field("d", Schema.create(Schema.Type.DOUBLE)))); + Schema schema = Schema.createMap(recordSchema); + GenericDatumReader reader = new GenericDatumReader<>(schema); + + byte[] data = encodeVarints(1L, 0L); + BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(data, null); + assertThrows(EOFException.class, () -> reader.read(null, decoder)); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericDatumWriter.java b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericDatumWriter.java index 2d5bf202d7f..8ed6bca856d 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericDatumWriter.java +++ b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericDatumWriter.java @@ -17,14 +17,14 @@ */ package org.apache.avro.generic; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.*; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; +import java.util.Arrays; import java.util.Collections; import java.util.ConcurrentModificationException; import java.util.HashMap; @@ -37,35 +37,35 @@ import java.util.concurrent.Future; import org.apache.avro.AvroTypeException; import org.apache.avro.Schema; +import org.apache.avro.SchemaParser; import org.apache.avro.UnresolvedUnionException; import org.apache.avro.io.BinaryEncoder; import org.apache.avro.io.DecoderFactory; import org.apache.avro.io.Encoder; import org.apache.avro.io.EncoderFactory; import org.apache.avro.util.Utf8; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestGenericDatumWriter { @Test - public void testUnionUnresolvedExceptionExplicitWhichField() throws IOException { + void unionUnresolvedExceptionExplicitWhichField() throws IOException { Schema s = schemaWithExplicitNullDefault(); GenericRecord r = new GenericData.Record(s); r.put("f", 100); ByteArrayOutputStream bao = new ByteArrayOutputStream(); - EncoderFactory.get().jsonEncoder(s, bao); try { new GenericDatumWriter<>(s).write(r, EncoderFactory.get().jsonEncoder(s, bao)); fail(); } catch (final UnresolvedUnionException uue) { - assertEquals("Not in union [\"null\",\"string\"]: 100 (field=f)", uue.getMessage()); + assertEquals("Not in union [\"null\",\"string\"]: java.lang.Integer (field=f)", uue.getMessage()); } } @Test - public void testWrite() throws IOException { + void write() throws IOException { String json = "{\"type\": \"record\", \"name\": \"r\", \"fields\": [" + "{ \"name\": \"f1\", \"type\": \"long\" }" + "]}"; - Schema s = new Schema.Parser().parse(json); + Schema s = SchemaParser.parseSingle(json); GenericRecord r = new GenericData.Record(s); r.put("f1", 100L); ByteArrayOutputStream bao = new ByteArrayOutputStream(); @@ -80,9 +80,9 @@ public void testWrite() throws IOException { } @Test - public void testArrayConcurrentModification() throws Exception { + void arrayConcurrentModification() throws Exception { String json = "{\"type\": \"array\", \"items\": \"int\" }"; - Schema s = new Schema.Parser().parse(json); + Schema s = SchemaParser.parseSingle(json); final GenericArray a = new GenericData.Array<>(1, s); ByteArrayOutputStream bao = new ByteArrayOutputStream(); final GenericDatumWriter> w = new GenericDatumWriter<>(s); @@ -113,9 +113,9 @@ public void testArrayConcurrentModification() throws Exception { } @Test - public void testMapConcurrentModification() throws Exception { + void mapConcurrentModification() throws Exception { String json = "{\"type\": \"map\", \"values\": \"int\" }"; - Schema s = new Schema.Parser().parse(json); + Schema s = SchemaParser.parseSingle(json); final Map m = new HashMap<>(); ByteArrayOutputStream bao = new ByteArrayOutputStream(); final GenericDatumWriter> w = new GenericDatumWriter<>(s); @@ -146,7 +146,7 @@ public void testMapConcurrentModification() throws Exception { } @Test - public void testAllowWritingPrimitives() throws IOException { + void allowWritingPrimitives() throws IOException { Schema doubleType = Schema.create(Schema.Type.DOUBLE); Schema.Field field = new Schema.Field("double", doubleType); List fields = Collections.singletonList(field); @@ -282,75 +282,219 @@ public void writeMapEnd() throws IOException { public void writeIndex(int unionIndex) throws IOException { e.writeIndex(unionIndex); } - }; - - @Test(expected = AvroTypeException.class) - public void writeDoesNotAllowStringForGenericEnum() throws IOException { - final String json = "{\"type\": \"record\", \"name\": \"recordWithEnum\"," + "\"fields\": [ " - + "{\"name\": \"field\", \"type\": " + "{\"type\": \"enum\", \"name\": \"enum\", \"symbols\": " - + "[\"ONE\",\"TWO\",\"THREE\"] " + "}" + "}" + "]}"; - Schema schema = new Schema.Parser().parse(json); - GenericRecord record = new GenericData.Record(schema); - record.put("field", "ONE"); - - ByteArrayOutputStream bao = new ByteArrayOutputStream(); - GenericDatumWriter writer = new GenericDatumWriter<>(schema); - Encoder encoder = EncoderFactory.get().jsonEncoder(schema, bao); + } - writer.write(record, encoder); + @Test + void writeDoesNotAllowStringForGenericEnum() throws IOException { + assertThrows(AvroTypeException.class, () -> { + final String json = "{\"type\": \"record\", \"name\": \"recordWithEnum\"," + "\"fields\": [ " + + "{\"name\": \"field\", \"type\": " + "{\"type\": \"enum\", \"name\": \"enum\", \"symbols\": " + + "[\"ONE\",\"TWO\",\"THREE\"] " + "}" + "}" + "]}"; + Schema schema = SchemaParser.parseSingle(json); + GenericRecord record = new GenericData.Record(schema); + record.put("field", "ONE"); + + ByteArrayOutputStream bao = new ByteArrayOutputStream(); + GenericDatumWriter writer = new GenericDatumWriter<>(schema); + Encoder encoder = EncoderFactory.get().jsonEncoder(schema, bao); + + writer.write(record, encoder); + }); } private enum AnEnum { ONE, TWO, THREE - }; - - @Test(expected = AvroTypeException.class) - public void writeDoesNotAllowJavaEnumForGenericEnum() throws IOException { - final String json = "{\"type\": \"record\", \"name\": \"recordWithEnum\"," + "\"fields\": [ " - + "{\"name\": \"field\", \"type\": " + "{\"type\": \"enum\", \"name\": \"enum\", \"symbols\": " - + "[\"ONE\",\"TWO\",\"THREE\"] " + "}" + "}" + "]}"; - Schema schema = new Schema.Parser().parse(json); - GenericRecord record = new GenericData.Record(schema); - record.put("field", AnEnum.ONE); - - ByteArrayOutputStream bao = new ByteArrayOutputStream(); - GenericDatumWriter writer = new GenericDatumWriter<>(schema); - Encoder encoder = EncoderFactory.get().jsonEncoder(schema, bao); + } - writer.write(record, encoder); + @Test + void writeDoesNotAllowJavaEnumForGenericEnum() throws IOException { + assertThrows(AvroTypeException.class, () -> { + final String json = "{\"type\": \"record\", \"name\": \"recordWithEnum\"," + "\"fields\": [ " + + "{\"name\": \"field\", \"type\": " + "{\"type\": \"enum\", \"name\": \"enum\", \"symbols\": " + + "[\"ONE\",\"TWO\",\"THREE\"] " + "}" + "}" + "]}"; + Schema schema = SchemaParser.parseSingle(json); + GenericRecord record = new GenericData.Record(schema); + record.put("field", AnEnum.ONE); + + ByteArrayOutputStream bao = new ByteArrayOutputStream(); + GenericDatumWriter writer = new GenericDatumWriter<>(schema); + Encoder encoder = EncoderFactory.get().jsonEncoder(schema, bao); + + writer.write(record, encoder); + }); } @Test - public void writeFieldWithDefaultWithExplicitNullDefaultInSchema() throws Exception { + void writeFieldWithDefaultWithExplicitNullDefaultInSchema() throws Exception { Schema schema = schemaWithExplicitNullDefault(); GenericRecord record = createRecordWithDefaultField(schema); - writeObject(schema, record); + writeObject(record); } @Test - public void writeFieldWithDefaultWithoutExplicitNullDefaultInSchema() throws Exception { + void writeFieldWithDefaultWithoutExplicitNullDefaultInSchema() throws Exception { Schema schema = schemaWithoutExplicitNullDefault(); GenericRecord record = createRecordWithDefaultField(schema); - writeObject(schema, record); + writeObject(record); + } + + @Test + void nestedNPEErrorClarity() throws Exception { + GenericData.Record topLevelRecord = buildComplexRecord(); + @SuppressWarnings("unchecked") + Map map = (Map) ((List) ((GenericData.Record) topLevelRecord + .get("unionField")).get("arrayField")).get(0).get("mapField"); + map.get("a").put("strField", null); + try { + writeObject(topLevelRecord); + fail("expected to throw"); + } catch (NullPointerException expected) { + assertTrue( + expected.getMessage() + .contains("RecordWithRequiredFields.unionField[UnionRecord].arrayField[0].mapField[\"a\"].strField"), + "unexpected message " + expected.getMessage()); + } + } + + @Test + void nPEForMapKeyErrorClarity() throws Exception { + GenericData.Record topLevelRecord = buildComplexRecord(); + @SuppressWarnings("unchecked") + Map map = (Map) ((List) ((GenericData.Record) topLevelRecord + .get("unionField")).get("arrayField")).get(0).get("mapField"); + map.put(null, map.get("a")); // value is valid, but key is null + try { + writeObject(topLevelRecord); + fail("expected to throw"); + } catch (NullPointerException expected) { + assertTrue( + expected.getMessage() + .contains("null key in map at RecordWithRequiredFields.unionField[UnionRecord].arrayField[0].mapField"), + "unexpected message " + expected.getMessage()); + } + } + + @Test + void shortPathNPEErrorClarity() throws Exception { + try { + writeObject(Schema.create(Schema.Type.STRING), null); + fail("expected to throw"); + } catch (NullPointerException expected) { + assertTrue(expected.getMessage().contains("null value for (non-nullable) string"), + "unexpected message " + expected.getMessage()); + } + } + + @Test + void nestedCCEErrorClarity() throws Exception { + GenericData.Record topLevelRecord = buildComplexRecord(); + @SuppressWarnings("unchecked") + Map map = (Map) ((List) ((GenericData.Record) topLevelRecord + .get("unionField")).get("arrayField")).get(0).get("mapField"); + map.get("a").put("strField", 42); // not a string + try { + writeObject(topLevelRecord); + fail("expected to throw"); + } catch (ClassCastException expected) { + assertTrue( + expected.getMessage() + .contains("RecordWithRequiredFields.unionField[UnionRecord].arrayField[0].mapField[\"a\"].strField"), + "unexpected message " + expected.getMessage()); + } + } + + @Test + void shortPathCCEErrorClarity() throws Exception { + try { + writeObject(Schema.create(Schema.Type.STRING), 42); + fail("expected to throw"); + } catch (ClassCastException expected) { + assertTrue( + expected.getMessage().contains("value 42 (a java.lang.Integer) cannot be cast to expected type string"), + "unexpected message " + expected.getMessage()); + } + } + + @Test + void nestedATEErrorClarity() throws Exception { + GenericData.Record topLevelRecord = buildComplexRecord(); + @SuppressWarnings("unchecked") + Map map = (Map) ((List) ((GenericData.Record) topLevelRecord + .get("unionField")).get("arrayField")).get(0).get("mapField"); + map.get("a").put("enumField", 42); // not an enum + try { + writeObject(topLevelRecord); + fail("expected to throw"); + } catch (AvroTypeException expected) { + assertTrue( + expected.getMessage() + .contains("RecordWithRequiredFields.unionField[UnionRecord].arrayField[0].mapField[\"a\"].enumField"), + "unexpected message " + expected.getMessage()); + assertTrue(expected.getMessage().contains("42 (a java.lang.Integer) is not a MapRecordEnum"), + "unexpected message " + expected.getMessage()); + } + } + + private GenericData.Record buildComplexRecord() throws IOException { + + Schema schema = new SchemaParser() + .parse(new File("target/test-classes/share/test/schemas/RecordWithRequiredFields.avsc")).mainSchema(); + + GenericData.Record topLevelRecord = new GenericData.Record(schema); + GenericData.Record unionRecord = new GenericData.Record(schema.getField("unionField").schema().getTypes().get(1)); + Schema arraySchema = unionRecord.getSchema().getField("arrayField").schema(); + GenericData.Record arrayRecord1 = new GenericData.Record(arraySchema.getElementType()); + GenericData.Record arrayRecord2 = new GenericData.Record(arraySchema.getElementType()); + GenericData.Array array = new GenericData.Array<>(arraySchema, + Arrays.asList(arrayRecord1, arrayRecord2)); + Schema mapRecordSchema = arraySchema.getElementType().getField("mapField").schema().getValueType(); + GenericData.Record mapRecordA = new GenericData.Record(mapRecordSchema); + Schema mapRecordEnumSchema = mapRecordSchema.getField("enumField").schema(); + + mapRecordA.put("enumField", new GenericData.EnumSymbol(mapRecordEnumSchema, "B")); + mapRecordA.put("strField", "4"); + + arrayRecord1.put("strField", "2"); + HashMap map1 = new HashMap<>(); + map1.put("a", mapRecordA); + arrayRecord1.put("mapField", map1); + + arrayRecord2.put("strField", "2"); + HashMap map2 = new HashMap<>(); + map2.put("a", mapRecordA); + arrayRecord2.put("mapField", map2); + + unionRecord.put(unionRecord.getSchema().getField("strField").pos(), "1"); + unionRecord.put(unionRecord.getSchema().getField("arrayField").pos(), array); // BOOM + + topLevelRecord.put(topLevelRecord.getSchema().getField("strField").pos(), "0"); + topLevelRecord.put(topLevelRecord.getSchema().getField("unionField").pos(), unionRecord); + + return topLevelRecord; } private Schema schemaWithExplicitNullDefault() { String schema = "{\"type\":\"record\",\"name\":\"my_record\",\"namespace\":\"mytest.namespace\",\"doc\":\"doc\"," + "\"fields\":[{\"name\":\"f\",\"type\":[\"null\",\"string\"],\"doc\":\"field doc doc\", " + "\"default\":null}]}"; - return new Schema.Parser().parse(schema); + return SchemaParser.parseSingle(schema); } private Schema schemaWithoutExplicitNullDefault() { String schema = "{\"type\":\"record\",\"name\":\"my_record\",\"namespace\":\"mytest.namespace\",\"doc\":\"doc\"," + "\"fields\":[{\"name\":\"f\",\"type\":[\"null\",\"string\"],\"doc\":\"field doc doc\"}]}"; - return new Schema.Parser().parse(schema); + return SchemaParser.parseSingle(schema); + } + + private void writeObject(GenericRecord datum) throws Exception { + writeObject(datum.getSchema(), datum); } - private void writeObject(Schema schema, GenericRecord datum) throws Exception { + private void writeObject(Schema schema, Object datum) throws Exception { BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(new ByteArrayOutputStream(), null); - GenericDatumWriter writer = new GenericDatumWriter<>(schema); - writer.write(schema, datum, encoder); + GenericDatumWriter writer = new GenericDatumWriter<>(schema); + writer.write(datum, encoder); + encoder.flush(); } private GenericRecord createRecordWithDefaultField(Schema schema) { diff --git a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericLogicalTypes.java b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericLogicalTypes.java index 4da31ea5a8f..6df4a8af6a2 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericLogicalTypes.java +++ b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericLogicalTypes.java @@ -18,13 +18,28 @@ package org.apache.avro.generic; -import static org.hamcrest.Matchers.is; -import static org.junit.Assert.assertThat; +import org.apache.avro.Conversion; +import org.apache.avro.Conversions; +import org.apache.avro.CustomType; +import org.apache.avro.LogicalType; +import org.apache.avro.LogicalTypes; +import org.apache.avro.Schema; +import org.apache.avro.data.TimeConversions; +import org.apache.avro.file.DataFileReader; +import org.apache.avro.file.DataFileWriter; +import org.apache.avro.file.FileReader; +import org.apache.avro.io.DatumReader; +import org.apache.avro.io.DatumWriter; +import org.apache.avro.util.TimePeriod; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; import java.io.File; import java.io.IOException; import java.math.BigDecimal; import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.time.Instant; import java.time.LocalDateTime; import java.time.ZoneOffset; @@ -32,41 +47,32 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.Random; import java.util.UUID; -import org.apache.avro.Conversion; -import org.apache.avro.Conversions; -import org.apache.avro.LogicalType; -import org.apache.avro.LogicalTypes; -import org.apache.avro.Schema; -import org.apache.avro.data.TimeConversions; -import org.apache.avro.file.DataFileReader; -import org.apache.avro.file.DataFileWriter; -import org.apache.avro.file.FileReader; -import org.apache.avro.io.DatumReader; -import org.apache.avro.io.DatumWriter; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.is; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotSame; public class TestGenericLogicalTypes { - @Rule - public TemporaryFolder temp = new TemporaryFolder(); + @TempDir + public File temp; public static final GenericData GENERIC = new GenericData(); - @BeforeClass + @BeforeAll public static void addLogicalTypes() { GENERIC.addLogicalTypeConversion(new Conversions.DecimalConversion()); GENERIC.addLogicalTypeConversion(new Conversions.UUIDConversion()); + GENERIC.addLogicalTypeConversion(new Conversions.DurationConversion()); GENERIC.addLogicalTypeConversion(new TimeConversions.LocalTimestampMicrosConversion()); GENERIC.addLogicalTypeConversion(new TimeConversions.LocalTimestampMillisConversion()); } @Test - public void testReadUUID() throws IOException { + public void readUUID() throws IOException { Schema uuidSchema = Schema.create(Schema.Type.STRING); LogicalTypes.uuid().addToSchema(uuidSchema); @@ -75,11 +81,11 @@ public void testReadUUID() throws IOException { List expected = Arrays.asList(u1, u2); File test = write(Schema.create(Schema.Type.STRING), u1.toString(), u2.toString()); - Assert.assertEquals("Should convert Strings to UUIDs", expected, read(GENERIC.createDatumReader(uuidSchema), test)); + assertEquals(expected, read(GENERIC.createDatumReader(uuidSchema), test), "Should convert Strings to UUIDs"); } @Test - public void testWriteUUID() throws IOException { + public void writeUUID() throws IOException { Schema stringSchema = Schema.create(Schema.Type.STRING); stringSchema.addProp(GenericData.STRING_PROP, "String"); Schema uuidSchema = Schema.create(Schema.Type.STRING); @@ -90,12 +96,12 @@ public void testWriteUUID() throws IOException { List expected = Arrays.asList(u1.toString(), u2.toString()); File test = write(GENERIC, uuidSchema, u1, u2); - Assert.assertEquals("Should read UUIDs as Strings", expected, - read(GenericData.get().createDatumReader(stringSchema), test)); + assertEquals(expected, read(GenericData.get().createDatumReader(stringSchema), test), + "Should read UUIDs as Strings"); } @Test - public void testWriteNullableUUID() throws IOException { + public void writeNullableUUID() throws IOException { Schema stringSchema = Schema.create(Schema.Type.STRING); stringSchema.addProp(GenericData.STRING_PROP, "String"); Schema nullableStringSchema = Schema.createUnion(Schema.create(Schema.Type.NULL), stringSchema); @@ -109,12 +115,43 @@ public void testWriteNullableUUID() throws IOException { List expected = Arrays.asList(u1.toString(), u2.toString()); File test = write(GENERIC, nullableUuidSchema, u1, u2); - Assert.assertEquals("Should read UUIDs as Strings", expected, - read(GenericData.get().createDatumReader(nullableStringSchema), test)); + assertEquals(expected, read(GenericData.get().createDatumReader(nullableStringSchema), test), + "Should read UUIDs as Strings"); + } + + @Test + public void readWriteDuration() throws IOException { + Schema fixedSchema = Schema.createFixed("bare.Fixed", null, null, 12); + + Schema durationSchema = Schema.createFixed("time.Duration", null, null, 12); + LogicalTypes.duration().addToSchema(durationSchema); + + // These two are necessary for schema evolution! + fixedSchema.addAlias(durationSchema.getFullName()); + durationSchema.addAlias(fixedSchema.getFullName()); + + Random rng = new Random(); + TimePeriod d1 = TimePeriod.of(rng.nextInt(1000), rng.nextInt(1000), rng.nextInt(1000)); + ByteBuffer b1 = ByteBuffer.allocate(12).order(ByteOrder.LITTLE_ENDIAN).putInt((int) d1.getMonths()) + .putInt((int) d1.getDays()).putInt((int) d1.getMillis()); + GenericFixed f1 = new GenericData.Fixed(fixedSchema, b1.array()); + + TimePeriod d2 = TimePeriod.of(rng.nextInt(1000), rng.nextInt(1000), rng.nextInt(1000)); + ByteBuffer b2 = ByteBuffer.allocate(12).order(ByteOrder.LITTLE_ENDIAN).putInt((int) d2.getMonths()) + .putInt((int) d2.getDays()).putInt((int) d2.getMillis()); + GenericFixed f2 = new GenericData.Fixed(fixedSchema, b2.array()); + + File test = write(fixedSchema, f1, f2); + assertEquals(Arrays.asList(d1, d2), read(GENERIC.createDatumReader(durationSchema), test), + "Should convert fixed bytes to durations"); + + test = write(GENERIC, durationSchema, d2, d1); + assertEquals(Arrays.asList(f2, f1), read(GenericData.get().createDatumReader(fixedSchema), test), + "Should convert durations to fixed bytes"); } @Test - public void testReadDecimalFixed() throws IOException { + public void readDecimalFixed() throws IOException { LogicalType decimal = LogicalTypes.decimal(9, 2); Schema fixedSchema = Schema.createFixed("aFixed", null, null, 4); Schema decimalSchema = decimal.addToSchema(Schema.createFixed("aFixed", null, null, 4)); @@ -130,12 +167,11 @@ public void testReadDecimalFixed() throws IOException { GenericFixed d2fixed = conversion.toFixed(d2, fixedSchema, decimal); File test = write(fixedSchema, d1fixed, d2fixed); - Assert.assertEquals("Should convert fixed to BigDecimals", expected, - read(GENERIC.createDatumReader(decimalSchema), test)); + assertEquals(expected, read(GENERIC.createDatumReader(decimalSchema), test), "Should convert fixed to BigDecimals"); } @Test - public void testWriteDecimalFixed() throws IOException { + public void writeDecimalFixed() throws IOException { LogicalType decimal = LogicalTypes.decimal(9, 2); Schema fixedSchema = Schema.createFixed("aFixed", null, null, 4); Schema decimalSchema = decimal.addToSchema(Schema.createFixed("aFixed", null, null, 4)); @@ -150,16 +186,16 @@ public void testWriteDecimalFixed() throws IOException { List expected = Arrays.asList(d1fixed, d2fixed); File test = write(GENERIC, decimalSchema, d1, d2); - Assert.assertEquals("Should read BigDecimals as fixed", expected, - read(GenericData.get().createDatumReader(fixedSchema), test)); + assertEquals(expected, read(GenericData.get().createDatumReader(fixedSchema), test), + "Should read BigDecimals as fixed"); } @Test - public void testDecimalToFromBytes() throws IOException { + public void decimalToFromBytes() { LogicalType decimal = LogicalTypes.decimal(9, 2); Schema bytesSchema = Schema.create(Schema.Type.BYTES); - // Check that the round trip to and from bytes + // Check the round trip to and from bytes BigDecimal d1 = new BigDecimal("-34.34"); BigDecimal d2 = new BigDecimal("117230.00"); @@ -176,11 +212,11 @@ public void testDecimalToFromBytes() throws IOException { } @Test - public void testDecimalToFromFixed() throws IOException { + public void decimalToFromFixed() { LogicalType decimal = LogicalTypes.decimal(9, 2); Schema fixedSchema = Schema.createFixed("aFixed", null, null, 4); - // Check that the round trip to and from fixed data. + // Check the round trip to and from fixed data. BigDecimal d1 = new BigDecimal("-34.34"); BigDecimal d2 = new BigDecimal("117230.00"); @@ -193,7 +229,7 @@ public void testDecimalToFromFixed() throws IOException { } @Test - public void testReadDecimalBytes() throws IOException { + public void readDecimalBytes() throws IOException { LogicalType decimal = LogicalTypes.decimal(9, 2); Schema bytesSchema = Schema.create(Schema.Type.BYTES); Schema decimalSchema = decimal.addToSchema(Schema.create(Schema.Type.BYTES)); @@ -209,12 +245,11 @@ public void testReadDecimalBytes() throws IOException { ByteBuffer d2bytes = conversion.toBytes(d2, bytesSchema, decimal); File test = write(bytesSchema, d1bytes, d2bytes); - Assert.assertEquals("Should convert bytes to BigDecimals", expected, - read(GENERIC.createDatumReader(decimalSchema), test)); + assertEquals(expected, read(GENERIC.createDatumReader(decimalSchema), test), "Should convert bytes to BigDecimals"); } @Test - public void testWriteDecimalBytes() throws IOException { + public void writeDecimalBytes() throws IOException { LogicalType decimal = LogicalTypes.decimal(9, 2); Schema bytesSchema = Schema.create(Schema.Type.BYTES); Schema decimalSchema = decimal.addToSchema(Schema.create(Schema.Type.BYTES)); @@ -230,8 +265,8 @@ public void testWriteDecimalBytes() throws IOException { List expected = Arrays.asList(d1bytes, d2bytes); File test = write(GENERIC, decimalSchema, d1bytes, d2bytes); - Assert.assertEquals("Should read BigDecimals as bytes", expected, - read(GenericData.get().createDatumReader(bytesSchema), test)); + assertEquals(expected, read(GenericData.get().createDatumReader(bytesSchema), test), + "Should read BigDecimals as bytes"); } private List read(DatumReader reader, File file) throws IOException { @@ -246,13 +281,14 @@ private List read(DatumReader reader, File file) throws IOException { return data; } - private File write(Schema schema, D... data) throws IOException { + @SafeVarargs + private final File write(Schema schema, D... data) throws IOException { return write(GenericData.get(), schema, data); } @SuppressWarnings("unchecked") private File write(GenericData model, Schema schema, D... data) throws IOException { - File file = temp.newFile(); + File file = new File(temp, "out.avro"); DatumWriter writer = model.createDatumWriter(schema); try (DataFileWriter fileWriter = new DataFileWriter<>(writer)) { @@ -266,12 +302,12 @@ private File write(GenericData model, Schema schema, D... data) throws IOExc } @Test - public void testCopyUuid() { + public void copyUuid() { testCopy(LogicalTypes.uuid().addToSchema(Schema.create(Schema.Type.STRING)), UUID.randomUUID(), GENERIC); } @Test - public void testCopyUuidRaw() { + public void copyUuidRaw() { testCopy(LogicalTypes.uuid().addToSchema(Schema.create(Schema.Type.STRING)), UUID.randomUUID().toString(), // use // raw // type @@ -279,13 +315,13 @@ public void testCopyUuidRaw() { } @Test - public void testCopyDecimal() { + public void copyDecimal() { testCopy(LogicalTypes.decimal(9, 2).addToSchema(Schema.create(Schema.Type.BYTES)), new BigDecimal("-34.34"), GENERIC); } @Test - public void testCopyDecimalRaw() { + public void copyDecimalRaw() { testCopy(LogicalTypes.decimal(9, 2).addToSchema(Schema.create(Schema.Type.BYTES)), ByteBuffer.wrap(new BigDecimal("-34.34").unscaledValue().toByteArray()), GenericData.get()); // no conversions } @@ -307,28 +343,28 @@ private void testCopy(Schema schema, Object value, GenericData model) { // test nested in array Schema arraySchema = Schema.createArray(schema); - ArrayList array = new ArrayList(Collections.singletonList(value)); + ArrayList array = new ArrayList<>(Collections.singletonList(value)); checkCopy(array, model.deepCopy(arraySchema, array), true); // test record nested in array Schema recordArraySchema = Schema.createArray(recordSchema); - ArrayList recordArray = new ArrayList(Collections.singletonList(record)); + ArrayList recordArray = new ArrayList<>(Collections.singletonList(record)); checkCopy(recordArray, model.deepCopy(recordArraySchema, recordArray), true); } private void checkCopy(Object original, Object copy, boolean notSame) { if (notSame) - Assert.assertNotSame(original, copy); - Assert.assertEquals(original, copy); + assertNotSame(original, copy); + assertEquals(original, copy); } @Test - public void testReadLocalTimestampMillis() throws IOException { + public void readLocalTimestampMillis() throws IOException { LogicalType timestamp = LogicalTypes.localTimestampMillis(); Schema longSchema = Schema.create(Schema.Type.LONG); Schema timestampSchema = timestamp.addToSchema(Schema.create(Schema.Type.LONG)); - LocalDateTime i1 = LocalDateTime.of(1986, 06, 26, 12, 07, 11, 42000000); + LocalDateTime i1 = LocalDateTime.of(1986, 6, 26, 12, 7, 11, 42000000); LocalDateTime i2 = LocalDateTime.ofInstant(Instant.ofEpochMilli(0), ZoneOffset.UTC); List expected = Arrays.asList(i1, i2); @@ -339,17 +375,17 @@ public void testReadLocalTimestampMillis() throws IOException { Long i2long = 0L; File test = write(longSchema, i1long, i2long); - Assert.assertEquals("Should convert long to LocalDateTime", expected, - read(GENERIC.createDatumReader(timestampSchema), test)); + assertEquals(expected, read(GENERIC.createDatumReader(timestampSchema), test), + "Should convert long to LocalDateTime"); } @Test - public void testWriteLocalTimestampMillis() throws IOException { + public void writeLocalTimestampMillis() throws IOException { LogicalType timestamp = LogicalTypes.localTimestampMillis(); Schema longSchema = Schema.create(Schema.Type.LONG); Schema timestampSchema = timestamp.addToSchema(Schema.create(Schema.Type.LONG)); - LocalDateTime i1 = LocalDateTime.of(1986, 06, 26, 12, 07, 11, 42000000); + LocalDateTime i1 = LocalDateTime.of(1986, 6, 26, 12, 7, 11, 42000000); LocalDateTime i2 = LocalDateTime.ofInstant(Instant.ofEpochMilli(0), ZoneOffset.UTC); Conversion conversion = new TimeConversions.LocalTimestampMillisConversion(); @@ -359,17 +395,17 @@ public void testWriteLocalTimestampMillis() throws IOException { List expected = Arrays.asList(d1long, d2long); File test = write(GENERIC, timestampSchema, i1, i2); - Assert.assertEquals("Should read LocalDateTime as longs", expected, - read(GenericData.get().createDatumReader(timestampSchema), test)); + assertEquals(expected, read(GenericData.get().createDatumReader(timestampSchema), test), + "Should read LocalDateTime as longs"); } @Test - public void testReadLocalTimestampMicros() throws IOException { + public void readLocalTimestampMicros() throws IOException { LogicalType timestamp = LogicalTypes.localTimestampMicros(); Schema longSchema = Schema.create(Schema.Type.LONG); Schema timestampSchema = timestamp.addToSchema(Schema.create(Schema.Type.LONG)); - LocalDateTime i1 = LocalDateTime.of(1986, 06, 26, 12, 07, 11, 420000); + LocalDateTime i1 = LocalDateTime.of(1986, 6, 26, 12, 7, 11, 420000); LocalDateTime i2 = LocalDateTime.ofInstant(Instant.ofEpochSecond(0, 4000), ZoneOffset.UTC); List expected = Arrays.asList(i1, i2); @@ -380,17 +416,17 @@ public void testReadLocalTimestampMicros() throws IOException { Long i2long = conversion.toLong(i2, longSchema, timestamp); File test = write(longSchema, i1long, i2long); - Assert.assertEquals("Should convert long to LocalDateTime", expected, - read(GENERIC.createDatumReader(timestampSchema), test)); + assertEquals(expected, read(GENERIC.createDatumReader(timestampSchema), test), + "Should convert long to LocalDateTime"); } @Test - public void testWriteLocalTimestampMicros() throws IOException { + public void writeLocalTimestampMicros() throws IOException { LogicalType timestamp = LogicalTypes.localTimestampMicros(); Schema longSchema = Schema.create(Schema.Type.LONG); Schema timestampSchema = timestamp.addToSchema(Schema.create(Schema.Type.LONG)); - LocalDateTime i1 = LocalDateTime.of(1986, 06, 26, 12, 07, 11, 420000); + LocalDateTime i1 = LocalDateTime.of(1986, 6, 26, 12, 7, 11, 420000); LocalDateTime i2 = LocalDateTime.ofInstant(Instant.ofEpochSecond(0, 4000), ZoneOffset.UTC); Conversion conversion = new TimeConversions.LocalTimestampMicrosConversion(); @@ -400,7 +436,56 @@ public void testWriteLocalTimestampMicros() throws IOException { List expected = Arrays.asList(d1long, d2long); File test = write(GENERIC, timestampSchema, i1, i2); - Assert.assertEquals("Should read LocalDateTime as longs", expected, - read(GenericData.get().createDatumReader(timestampSchema), test)); + assertEquals(expected, read(GenericData.get().createDatumReader(timestampSchema), test), + "Should read LocalDateTime as longs"); + } + + @Test + public void testReadAutomaticallyRegisteredUri() throws IOException { + Schema stringSchema = Schema.create(Schema.Type.STRING); + GenericData.setStringType(stringSchema, GenericData.StringType.String); + LogicalType customType = LogicalTypes.getCustomRegisteredTypes().get("custom").fromSchema(stringSchema); + Schema customTypeSchema = customType.addToSchema(Schema.create(Schema.Type.STRING)); + + CustomType ct1 = new CustomType("foo"); + CustomType ct2 = new CustomType("bar"); + List expected = Arrays.asList(ct1, ct2); + + Conversion conversion = GENERIC.getConversionFor(customType); + + // use the conversion directly instead of relying on the write side + CharSequence ct1String = conversion.toCharSequence(ct1, stringSchema, customType); + CharSequence ct2String = conversion.toCharSequence(ct2, stringSchema, customType); + + File test = write(stringSchema, ct1String, ct2String); + assertEquals(expected, read(GENERIC.createDatumReader(customTypeSchema), test), + "Should convert string to CustomType"); + } + + @Test + public void testWriteAutomaticallyRegisteredUri() throws IOException { + Schema stringSchema = Schema.create(Schema.Type.STRING); + GenericData.setStringType(stringSchema, GenericData.StringType.String); + LogicalType customType = LogicalTypes.getCustomRegisteredTypes().get("custom").fromSchema(stringSchema); + Schema customTypeSchema = customType.addToSchema(Schema.create(Schema.Type.STRING)); + + CustomType ct1 = new CustomType("foo"); + CustomType ct2 = new CustomType("bar"); + + Conversion conversion = GENERIC.getConversionFor(customType); + + // use the conversion directly instead of relying on the write side + CharSequence ct1String = conversion.toCharSequence(ct1, stringSchema, customType); + CharSequence ct2String = conversion.toCharSequence(ct2, stringSchema, customType); + List expected = Arrays.asList(ct1String, ct2String); + + File test = write(GENERIC, customTypeSchema, ct1, ct2); + + // Note that this test still cannot read strings using the logical type + // schema, as all GenericData instances have the logical type and the + // conversions loaded. That's why this final assert is slightly different. + + assertEquals(expected, read(GenericData.get().createDatumReader(stringSchema), test), + "Should read CustomType as strings"); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericRecordBuilder.java b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericRecordBuilder.java index 5fa321a3b27..d4eece27bd8 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericRecordBuilder.java +++ b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericRecordBuilder.java @@ -17,6 +17,8 @@ */ package org.apache.avro.generic; +import static org.junit.jupiter.api.Assertions.*; + import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -27,67 +29,70 @@ import org.apache.avro.Schema.Field; import org.apache.avro.Schema.Type; import org.apache.avro.generic.GenericData.Record; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; /** * Unit test for the GenericRecordBuilder class. */ public class TestGenericRecordBuilder { @Test - public void testGenericBuilder() { + void genericBuilder() { Schema schema = recordSchema(); GenericRecordBuilder builder = new GenericRecordBuilder(schema); // Verify that builder has no fields set after initialization: for (Field field : schema.getFields()) { - Assert.assertFalse("RecordBuilder should not have field " + field.name(), builder.has(field.name())); - Assert.assertNull("Field " + field.name() + " should be null", builder.get(field.name())); + assertFalse(builder.has(field.name()), "RecordBuilder should not have field " + field.name()); + assertNull(builder.get(field.name()), "Field " + field.name() + " should be null"); } // Set field in builder: builder.set("intField", 1); List anArray = Arrays.asList("one", "two", "three"); builder.set("anArray", anArray); - Assert.assertTrue("anArray should be set", builder.has("anArray")); - Assert.assertEquals(anArray, builder.get("anArray")); - Assert.assertFalse("id should not be set", builder.has("id")); - Assert.assertNull(builder.get("id")); + assertTrue(builder.has("anArray"), "anArray should be set"); + assertEquals(anArray, builder.get("anArray")); + assertFalse(builder.has("id"), "id should not be set"); + assertNull(builder.get("id")); // Build the record, and verify that fields are set: Record record = builder.build(); - Assert.assertEquals(1, record.get("intField")); - Assert.assertEquals(anArray, record.get("anArray")); - Assert.assertNotNull(record.get("id")); - Assert.assertEquals("0", record.get("id").toString()); + assertEquals(1, record.get("intField")); + assertEquals(anArray, record.get("anArray")); + assertNotNull(record.get("id")); + assertEquals("0", record.get("id").toString()); // Test copy constructors: - Assert.assertEquals(builder, new GenericRecordBuilder(builder)); - Assert.assertEquals(record, new GenericRecordBuilder(record).build()); + assertEquals(builder, new GenericRecordBuilder(builder)); + assertEquals(record, new GenericRecordBuilder(record).build()); // Test clear: builder.clear("intField"); - Assert.assertFalse(builder.has("intField")); - Assert.assertNull(builder.get("intField")); + assertFalse(builder.has("intField")); + assertNull(builder.get("intField")); } - @Test(expected = org.apache.avro.AvroRuntimeException.class) - public void attemptToSetNonNullableFieldToNull() { - new GenericRecordBuilder(recordSchema()).set("intField", null); + @Test + void attemptToSetNonNullableFieldToNull() { + assertThrows(org.apache.avro.AvroRuntimeException.class, () -> { + new GenericRecordBuilder(recordSchema()).set("intField", null); + }); } - @Test(expected = org.apache.avro.AvroRuntimeException.class) - public void buildWithoutSettingRequiredFields1() { - new GenericRecordBuilder(recordSchema()).build(); + @Test + void buildWithoutSettingRequiredFields1() { + assertThrows(org.apache.avro.AvroRuntimeException.class, () -> { + new GenericRecordBuilder(recordSchema()).build(); + }); } - @Test() - public void buildWithoutSettingRequiredFields2() { + @Test + void buildWithoutSettingRequiredFields2() { try { new GenericRecordBuilder(recordSchema()).set("anArray", Collections.singletonList("one")).build(); - Assert.fail("Should have thrown " + AvroRuntimeException.class.getCanonicalName()); + fail("Should have thrown " + AvroRuntimeException.class.getCanonicalName()); } catch (AvroRuntimeException e) { - Assert.assertTrue(e.getMessage().contains("intField")); + assertTrue(e.getMessage().contains("intField")); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/generic/TestSkipEnumSchema.java b/lang/java/avro/src/test/java/org/apache/avro/generic/TestSkipEnumSchema.java index b05c7b8552d..aae1af73860 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/generic/TestSkipEnumSchema.java +++ b/lang/java/avro/src/test/java/org/apache/avro/generic/TestSkipEnumSchema.java @@ -24,8 +24,7 @@ import org.apache.avro.io.DecoderFactory; import org.apache.avro.io.Encoder; import org.apache.avro.io.EncoderFactory; -import org.junit.Test; - +import org.junit.jupiter.api.Test; import java.io.ByteArrayOutputStream; import java.io.IOException; @@ -34,7 +33,7 @@ */ public class TestSkipEnumSchema { @Test - public void testSkipEnum() throws IOException { + void skipEnum() throws IOException { Schema enumSchema = SchemaBuilder.builder().enumeration("enum").symbols("en1", "en2"); EnumSymbol enumSymbol = new EnumSymbol(enumSchema, "en1"); diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/FastReaderBuilderJavaClassTest.java b/lang/java/avro/src/test/java/org/apache/avro/io/FastReaderBuilderJavaClassTest.java new file mode 100644 index 00000000000..8a23133366c --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/io/FastReaderBuilderJavaClassTest.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.io; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.generic.GenericRecord; +import org.junit.jupiter.api.Test; + +/** + * Tests for FastReaderBuilder behavior with schemas containing "java-class" + * attributes. + */ +public class FastReaderBuilderJavaClassTest { + + /** + * Tests that GenericDatumReader can deserialize records with string fields that + * have a "java-class" attribute (e.g., BigDecimal). + * + * This test reproduces a bug where + * FastReaderBuilder.getTransformingStringReader() casts the result of + * stringReader.read() directly to String, but in GenericData mode the reader + * returns Utf8, causing a ClassCastException. + */ + @Test + void genericDatumReaderWithJavaClassAttribute() throws IOException { + // Schema with a string field that has "java-class": "java.math.BigDecimal" + // This is a common pattern for representing decimal values as strings + String schemaJson = "{\n" + " \"type\": \"record\",\n" + " \"name\": \"TestRecord\",\n" + " \"fields\": [\n" + + " {\"name\": \"id\", \"type\": \"string\"},\n" + " {\"name\": \"price\", \"type\": [\"null\", {\n" + + " \"type\": \"string\",\n" + " \"java-class\": \"java.math.BigDecimal\"\n" + " }]}\n" + " ]\n" + + "}"; + + Schema schema = new Schema.Parser().parse(schemaJson); + + GenericRecord record = new GenericData.Record(schema); + record.put("id", "123"); + record.put("price", "-0.0002"); + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + GenericDatumWriter writer = new GenericDatumWriter<>(schema); + BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null); + writer.write(record, encoder); + encoder.flush(); + + byte[] serialized = out.toByteArray(); + + // Deserialize using GenericDatumReader (which uses FastReaderBuilder by + // default) + GenericDatumReader reader = new GenericDatumReader<>(schema); + BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(serialized, null); + + // AVRO-4225 this should not throw ClassCastException: Utf8 cannot be cast + // to String + GenericRecord result = reader.read(null, decoder); + + assertNotNull(result); + assertEquals("123", result.get("id").toString()); + assertEquals("-0.0002", result.get("price").toString()); + } + + /** + * Tests that GenericDatumReader can deserialize records with a direct string + * field (not in a union) that has a "java-class" attribute. + */ + @Test + void genericDatumReaderWithDirectJavaClassString() throws IOException { + String schemaJson = "{\n" + " \"type\": \"record\",\n" + " \"name\": \"TestRecord\",\n" + " \"fields\": [\n" + + " {\"name\": \"amount\", \"type\": {\n" + " \"type\": \"string\",\n" + + " \"java-class\": \"java.math.BigDecimal\"\n" + " }}\n" + " ]\n" + "}"; + + Schema schema = new Schema.Parser().parse(schemaJson); + + GenericRecord record = new GenericData.Record(schema); + record.put("amount", "123.45"); + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + GenericDatumWriter writer = new GenericDatumWriter<>(schema); + BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null); + writer.write(record, encoder); + encoder.flush(); + + byte[] serialized = out.toByteArray(); + + GenericDatumReader reader = new GenericDatumReader<>(schema); + BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(serialized, null); + + GenericRecord result = reader.read(null, decoder); + + assertNotNull(result); + assertEquals("123.45", result.get("amount").toString()); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryData.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryData.java index edbcd0bcbe3..595f8c31280 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryData.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryData.java @@ -18,8 +18,11 @@ package org.apache.avro.io; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; public class TestBinaryData { @@ -29,13 +32,63 @@ public class TestBinaryData { * normal 8). When skipping it, the next byte should be 10. */ @Test - public void testSkipLong() { + void skipLong() { byte[] b = new byte[10]; BinaryData.encodeLong(Long.MAX_VALUE, b, 0); final int nextIndex = BinaryData.skipLong(b, 0); - Assert.assertEquals(nextIndex, 10); + assertEquals(nextIndex, 10); + } + + @Test + void testIntLongVleEquality() { + byte[] intResult = new byte[9]; + byte[] longResult = new byte[9]; + BinaryData.encodeInt(0, intResult, 0); + BinaryData.encodeLong(0, longResult, 0); + assertArrayEquals(intResult, longResult); + BinaryData.encodeInt(42, intResult, 0); + BinaryData.encodeLong(42, longResult, 0); + assertArrayEquals(intResult, longResult); + BinaryData.encodeInt(-24, intResult, 0); + BinaryData.encodeLong(-24, longResult, 0); + assertArrayEquals(intResult, longResult); + BinaryData.encodeInt(Integer.MAX_VALUE, intResult, 0); + BinaryData.encodeLong(Integer.MAX_VALUE, longResult, 0); + assertArrayEquals(intResult, longResult); + BinaryData.encodeInt(Integer.MIN_VALUE, intResult, 0); + BinaryData.encodeLong(Integer.MIN_VALUE, longResult, 0); + assertArrayEquals(intResult, longResult); } + @Test + void testCompareBytesUnsigned() { + // Test case: byte value 0xFF (-1 as signed, 255 as unsigned) + // should be greater than 0x7F (127) + byte[] b1 = new byte[] { (byte) 0xFF }; + byte[] b2 = new byte[] { (byte) 0x7F }; + int result = BinaryData.compareBytes(b1, 0, 1, b2, 0, 1); + assertTrue(result > 0, "0xFF (255 unsigned) should be greater than 0x7F (127)"); + result = BinaryData.compareBytes(b2, 0, 1, b1, 0, 1); + assertTrue(result < 0, "0x7F (127) should be less than 0xFF (255 unsigned)"); + result = BinaryData.compareBytes(b1, 0, 1, b1, 0, 1); + assertEquals(0, result, "Equal byte arrays should return 0"); + + // Test with multiple bytes: {0x00, 0xFF} vs {0x00, 0x7F} + byte[] b3 = new byte[] { 0x00, (byte) 0xFF }; + byte[] b4 = new byte[] { 0x00, (byte) 0x7F }; + byte[] b5 = new byte[] { (byte) 0xFF, 0x00 }; + byte[] b6 = new byte[] { (byte) 0x7F, 0x00 }; + result = BinaryData.compareBytes(b3, 0, 2, b4, 0, 2); + assertTrue(result > 1, "{0x00, 0xFF} should be greater than {0x00, 0x7F}"); + result = BinaryData.compareBytes(b5, 0, 2, b6, 0, 2); + assertTrue(result > 1, "{0xFF, 0x00} should be greater than {0x7F, 0x00}"); + + // Test with negative byte values: -1 (0xFF) should be greater than -128 (0x80) + byte[] b7 = new byte[] { (byte) -1 }; + byte[] b8 = new byte[] { (byte) -128 }; + result = BinaryData.compareBytes(b7, 0, 1, b8, 0, 1); + assertTrue(result > 0, "-1 (0xFF=255 unsigned) should be greater than -128 (0x80=128 unsigned)"); + } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryDecoder.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryDecoder.java index e4bf8f89ce3..33e6f098926 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryDecoder.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryDecoder.java @@ -17,117 +17,157 @@ */ package org.apache.avro.io; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.EOFException; -import java.io.IOException; -import java.io.InputStream; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; import org.apache.avro.AvroRuntimeException; import org.apache.avro.Schema; +import org.apache.avro.SchemaParser; +import org.apache.avro.SystemLimitException; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.util.ByteBufferInputStream; import org.apache.avro.util.ByteBufferOutputStream; import org.apache.avro.util.RandomData; import org.apache.avro.util.Utf8; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; - -@RunWith(Parameterized.class) + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; + +import static org.apache.avro.TestSystemLimitException.*; + public class TestBinaryDecoder { // prime number buffer size so that looping tests hit the buffer edge // at different points in the loop. DecoderFactory factory = new DecoderFactory().configureDecoderBufferSize(521); - private boolean useDirect = false; + static EncoderFactory e_factory = EncoderFactory.get(); - public TestBinaryDecoder(boolean useDirect) { - this.useDirect = useDirect; + private Decoder newDecoderWithNoData(boolean useDirect) { + return newDecoder(new byte[0], useDirect); } - @Parameters - public static Collection data() { - return Arrays.asList(new Object[][] { { true }, { false }, }); + private BinaryDecoder newDecoder(byte[] bytes, int start, int len, boolean useDirect) { + return this.newDecoder(bytes, start, len, null, useDirect); } - private Decoder newDecoderWithNoData() throws IOException { - return newDecoder(new byte[0]); + private BinaryDecoder newDecoder(byte[] bytes, int start, int len, BinaryDecoder reuse, boolean useDirect) { + if (useDirect) { + final ByteArrayInputStream input = new ByteArrayInputStream(bytes, start, len); + return factory.directBinaryDecoder(input, reuse); + } else { + return factory.binaryDecoder(bytes, start, len, reuse); + } } - private Decoder newDecoder(byte[] bytes, int start, int len) throws IOException { - return factory.binaryDecoder(bytes, start, len, null); + private BinaryDecoder newDecoder(InputStream in, boolean useDirect) { + return this.newDecoder(in, null, useDirect); + } + private BinaryDecoder newDecoder(InputStream in, BinaryDecoder reuse, boolean useDirect) { + if (useDirect) { + return factory.directBinaryDecoder(in, reuse); + } else { + return factory.binaryDecoder(in, reuse); + } } - private Decoder newDecoder(InputStream in) { + private BinaryDecoder newDecoder(byte[] bytes, BinaryDecoder reuse, boolean useDirect) { if (useDirect) { - return factory.directBinaryDecoder(in, null); + return this.factory.directBinaryDecoder(new ByteArrayInputStream(bytes), reuse); } else { - return factory.binaryDecoder(in, null); + return factory.binaryDecoder(bytes, reuse); } } - private Decoder newDecoder(byte[] bytes) throws IOException { - return factory.binaryDecoder(bytes, null); + private BinaryDecoder newDecoder(byte[] bytes, boolean useDirect) { + return this.newDecoder(bytes, null, useDirect); + } + + /** + * Create a decoder for simulating reading corrupt, unexpected or out-of-bounds + * data. + * + * @return a {@link org.apache.avro.io.BinaryDecoder that has been initialized + * on a byte array containing the sequence of encoded longs in order. + */ + private BinaryDecoder newDecoder(boolean useDirect, long... values) throws IOException { + try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) { + BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(baos, null); + for (long v : values) + encoder.writeLong(v); + encoder.flush(); + return newDecoder(baos.toByteArray(), useDirect); + } } /** Verify EOFException throw at EOF */ - @Test(expected = EOFException.class) - public void testEOFBoolean() throws IOException { - newDecoderWithNoData().readBoolean(); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofBoolean(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readBoolean()); } - @Test(expected = EOFException.class) - public void testEOFInt() throws IOException { - newDecoderWithNoData().readInt(); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofInt(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readInt()); } - @Test(expected = EOFException.class) - public void testEOFLong() throws IOException { - newDecoderWithNoData().readLong(); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofLong(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readLong()); } - @Test(expected = EOFException.class) - public void testEOFFloat() throws IOException { - newDecoderWithNoData().readFloat(); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofFloat(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readFloat()); } - @Test(expected = EOFException.class) - public void testEOFDouble() throws IOException { - newDecoderWithNoData().readDouble(); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofDouble(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readDouble()); } - @Test(expected = EOFException.class) - public void testEOFBytes() throws IOException { - newDecoderWithNoData().readBytes(null); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofBytes(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readBytes(null)); } - @Test(expected = EOFException.class) - public void testEOFString() throws IOException { - newDecoderWithNoData().readString(new Utf8("a")); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofString(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readString(new Utf8("a"))); } - @Test(expected = EOFException.class) - public void testEOFFixed() throws IOException { - newDecoderWithNoData().readFixed(new byte[1]); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofFixed(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readFixed(new byte[1])); } - @Test(expected = EOFException.class) - public void testEOFEnum() throws IOException { - newDecoderWithNoData().readEnum(); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofEnum(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readEnum()); } @Test - public void testReuse() throws IOException { + void reuse() throws IOException { ByteBufferOutputStream bbo1 = new ByteBufferOutputStream(); ByteBufferOutputStream bbo2 = new ByteBufferOutputStream(); byte[] b1 = new byte[] { 1, 2 }; @@ -142,20 +182,20 @@ public void testReuse() throws IOException { DirectBinaryDecoder d = new DirectBinaryDecoder(new ByteBufferInputStream(bbo1.getBufferList())); ByteBuffer bb1 = d.readBytes(null); - Assert.assertEquals(b1.length, bb1.limit() - bb1.position()); + Assertions.assertEquals(b1.length, bb1.limit() - bb1.position()); d.configure(new ByteBufferInputStream(bbo2.getBufferList())); ByteBuffer bb2 = d.readBytes(null); - Assert.assertEquals(b1.length, bb2.limit() - bb2.position()); + Assertions.assertEquals(b1.length, bb2.limit() - bb2.position()); } private static byte[] data = null; private static Schema schema = null; - private static int count = 200; - private static ArrayList records = new ArrayList<>(count); + private static final int count = 200; + private static final ArrayList records = new ArrayList<>(count); - @BeforeClass + @BeforeAll public static void generateData() throws IOException { int seed = (int) System.currentTimeMillis(); // note some tests (testSkipping) rely on this explicitly @@ -165,7 +205,7 @@ public static void generateData() throws IOException { + "{\"name\":\"floatField\", \"type\":\"float\"}," + "{\"name\":\"doubleField\", \"type\":\"double\"}," + "{\"name\":\"arrayField\", \"type\": " + "{\"type\":\"array\", \"items\":\"boolean\"}}," + "{\"name\":\"longField\", \"type\":\"long\"}]}"; - schema = new Schema.Parser().parse(jsonSchema); + schema = SchemaParser.parseSingle(jsonSchema); GenericDatumWriter writer = new GenericDatumWriter<>(); writer.setSchema(schema); ByteArrayOutputStream baos = new ByteArrayOutputStream(8192); @@ -179,8 +219,9 @@ public static void generateData() throws IOException { data = baos.toByteArray(); } - @Test - public void testDecodeFromSources() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void decodeFromSources(boolean useDirect) throws IOException { GenericDatumReader reader = new GenericDatumReader<>(); reader.setSchema(schema); @@ -188,81 +229,82 @@ public void testDecodeFromSources() throws IOException { ByteArrayInputStream is2 = new ByteArrayInputStream(data); ByteArrayInputStream is3 = new ByteArrayInputStream(data); - Decoder fromInputStream = newDecoder(is); - Decoder fromArray = newDecoder(data); + Decoder fromInputStream = newDecoder(is, useDirect); + Decoder fromArray = newDecoder(data, useDirect); byte[] data2 = new byte[data.length + 30]; Arrays.fill(data2, (byte) 0xff); System.arraycopy(data, 0, data2, 15, data.length); - Decoder fromOffsetArray = newDecoder(data2, 15, data.length); + Decoder fromOffsetArray = newDecoder(data2, 15, data.length, useDirect); - BinaryDecoder initOnInputStream = factory.binaryDecoder(new byte[50], 0, 30, null); - initOnInputStream = factory.binaryDecoder(is2, initOnInputStream); - BinaryDecoder initOnArray = factory.binaryDecoder(is3, null); - initOnArray = factory.binaryDecoder(data, 0, data.length, initOnArray); + BinaryDecoder initOnInputStream = newDecoder(new byte[50], 0, 30, useDirect); + initOnInputStream = newDecoder(is2, initOnInputStream, useDirect); + BinaryDecoder initOnArray = this.newDecoder(is3, null, useDirect); + initOnArray = this.newDecoder(data, initOnArray, useDirect); for (Object datum : records) { - Assert.assertEquals("InputStream based BinaryDecoder result does not match", datum, - reader.read(null, fromInputStream)); - Assert.assertEquals("Array based BinaryDecoder result does not match", datum, reader.read(null, fromArray)); - Assert.assertEquals("offset Array based BinaryDecoder result does not match", datum, - reader.read(null, fromOffsetArray)); - Assert.assertEquals("InputStream initialized BinaryDecoder result does not match", datum, - reader.read(null, initOnInputStream)); - Assert.assertEquals("Array initialized BinaryDecoder result does not match", datum, - reader.read(null, initOnArray)); + Assertions.assertEquals(datum, reader.read(null, fromInputStream), + "InputStream based BinaryDecoder result does not match"); + Assertions.assertEquals(datum, reader.read(null, fromArray), "Array based BinaryDecoder result does not match"); + Assertions.assertEquals(datum, reader.read(null, fromOffsetArray), + "offset Array based BinaryDecoder result does not match"); + Assertions.assertEquals(datum, reader.read(null, initOnInputStream), + "InputStream initialized BinaryDecoder result does not match"); + Assertions.assertEquals(datum, reader.read(null, initOnArray), + "Array initialized BinaryDecoder result does not match"); } } - @Test - public void testInputStreamProxy() throws IOException { - Decoder d = newDecoder(data); - if (d instanceof BinaryDecoder) { - BinaryDecoder bd = (BinaryDecoder) d; + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void inputStreamProxy(boolean useDirect) throws IOException { + BinaryDecoder d = newDecoder(data, useDirect); + if (d != null) { + BinaryDecoder bd = d; InputStream test = bd.inputStream(); InputStream check = new ByteArrayInputStream(data); validateInputStreamReads(test, check); - bd = factory.binaryDecoder(data, bd); + bd = this.newDecoder(data, bd, useDirect); test = bd.inputStream(); check = new ByteArrayInputStream(data); validateInputStreamSkips(test, check); // with input stream sources - bd = factory.binaryDecoder(new ByteArrayInputStream(data), bd); + bd = newDecoder(new ByteArrayInputStream(data), bd, useDirect); test = bd.inputStream(); check = new ByteArrayInputStream(data); validateInputStreamReads(test, check); - bd = factory.binaryDecoder(new ByteArrayInputStream(data), bd); + bd = newDecoder(new ByteArrayInputStream(data), bd, useDirect); test = bd.inputStream(); check = new ByteArrayInputStream(data); validateInputStreamSkips(test, check); } } - @Test - public void testInputStreamProxyDetached() throws IOException { - Decoder d = newDecoder(data); - if (d instanceof BinaryDecoder) { - BinaryDecoder bd = (BinaryDecoder) d; - InputStream test = bd.inputStream(); - InputStream check = new ByteArrayInputStream(data); - // detach input stream and decoder from old source - factory.binaryDecoder(new byte[56], null); - InputStream bad = bd.inputStream(); - InputStream check2 = new ByteArrayInputStream(data); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void inputStreamProxyDetached(boolean useDirect) throws IOException { + BinaryDecoder bd = newDecoder(data, useDirect); + + InputStream test = bd.inputStream(); + InputStream check = new ByteArrayInputStream(data); + // detach input stream and decoder from old source + this.newDecoder(new byte[56], useDirect); + try (InputStream bad = bd.inputStream(); InputStream check2 = new ByteArrayInputStream(data)) { validateInputStreamReads(test, check); - Assert.assertFalse(bad.read() == check2.read()); + Assertions.assertNotEquals(bad.read(), check2.read()); } } - @Test - public void testInputStreamPartiallyUsed() throws IOException { - BinaryDecoder bd = factory.binaryDecoder(new ByteArrayInputStream(data), null); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void inputStreamPartiallyUsed(boolean useDirect) throws IOException { + BinaryDecoder bd = this.newDecoder(new ByteArrayInputStream(data), useDirect); InputStream test = bd.inputStream(); InputStream check = new ByteArrayInputStream(data); // triggers buffer fill if unused and tests isEnd() try { - Assert.assertFalse(bd.isEnd()); + Assertions.assertFalse(bd.isEnd()); } catch (UnsupportedOperationException e) { // this is ok if its a DirectBinaryDecoder. if (bd.getClass() != DirectBinaryDecoder.class) { @@ -280,25 +322,28 @@ private void validateInputStreamReads(InputStream test, InputStream check) throw while (true) { int t = test.read(); int c = check.read(); - Assert.assertEquals(c, t); - if (-1 == t) + Assertions.assertEquals(c, t); + if (-1 == t) { break; + } t = test.read(bt); c = check.read(bc); - Assert.assertEquals(c, t); - Assert.assertArrayEquals(bt, bc); - if (-1 == t) + Assertions.assertEquals(c, t); + Assertions.assertArrayEquals(bt, bc); + if (-1 == t) { break; + } t = test.read(bt, 1, 4); c = check.read(bc, 1, 4); - Assert.assertEquals(c, t); - Assert.assertArrayEquals(bt, bc); - if (-1 == t) + Assertions.assertEquals(c, t); + Assertions.assertArrayEquals(bt, bc); + if (-1 == t) { break; + } } - Assert.assertEquals(0, test.skip(5)); - Assert.assertEquals(0, test.available()); - Assert.assertFalse(test.getClass() != ByteArrayInputStream.class && test.markSupported()); + Assertions.assertEquals(0, test.skip(5)); + Assertions.assertEquals(0, test.available()); + Assertions.assertFalse(test.getClass() != ByteArrayInputStream.class && test.markSupported()); test.close(); } @@ -306,154 +351,345 @@ private void validateInputStreamSkips(InputStream test, InputStream check) throw while (true) { long t2 = test.skip(19); long c2 = check.skip(19); - Assert.assertEquals(c2, t2); - if (0 == t2) + Assertions.assertEquals(c2, t2); + if (0 == t2) { break; + } } - Assert.assertEquals(-1, test.read()); + Assertions.assertEquals(-1, test.read()); } - @Test - public void testBadIntEncoding() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void badIntEncoding(boolean useDirect) throws IOException { byte[] badint = new byte[5]; Arrays.fill(badint, (byte) 0xff); - Decoder bd = factory.binaryDecoder(badint, null); + Decoder bd = this.newDecoder(badint, useDirect); String message = ""; try { bd.readInt(); } catch (IOException ioe) { message = ioe.getMessage(); } - Assert.assertEquals("Invalid int encoding", message); + Assertions.assertEquals("Invalid int encoding", message); } - @Test - public void testBadLongEncoding() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void badLongEncoding(boolean useDirect) throws IOException { byte[] badint = new byte[10]; Arrays.fill(badint, (byte) 0xff); - Decoder bd = factory.binaryDecoder(badint, null); + Decoder bd = this.newDecoder(badint, useDirect); String message = ""; try { bd.readLong(); } catch (IOException ioe) { message = ioe.getMessage(); } - Assert.assertEquals("Invalid long encoding", message); + Assertions.assertEquals("Invalid long encoding", message); } - @Test - public void testNegativeStringLength() throws IOException { - byte[] bad = new byte[] { (byte) 1 }; - Decoder bd = factory.binaryDecoder(bad, null); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testStringNegativeLength(boolean useDirect) throws IOException { + Exception ex = Assertions.assertThrows(AvroRuntimeException.class, this.newDecoder(useDirect, -1L)::readString); + Assertions.assertEquals(ERROR_NEGATIVE, ex.getMessage()); + } - Assert.assertThrows("Malformed data. Length is negative: -1", AvroRuntimeException.class, bd::readString); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testStringVmMaxSize(boolean useDirect) throws IOException { + Exception ex = Assertions.assertThrows(UnsupportedOperationException.class, + newDecoder(useDirect, MAX_ARRAY_VM_LIMIT + 1L)::readString); + Assertions.assertEquals(ERROR_VM_LIMIT_STRING, ex.getMessage()); } - @Test - public void testStringMaxArraySize() throws IOException { - byte[] bad = new byte[10]; - BinaryData.encodeLong(BinaryDecoder.MAX_ARRAY_SIZE + 1, bad, 0); - Decoder bd = factory.binaryDecoder(bad, null); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testStringMaxCustom(boolean useDirect) throws IOException { + try { + System.setProperty(SystemLimitException.MAX_STRING_LENGTH_PROPERTY, Long.toString(128)); + resetLimits(); + Exception ex = Assertions.assertThrows(SystemLimitException.class, newDecoder(useDirect, 129)::readString); + Assertions.assertEquals("String length 129 exceeds maximum allowed", ex.getMessage()); + } finally { + System.clearProperty(SystemLimitException.MAX_STRING_LENGTH_PROPERTY); + resetLimits(); + } + } - Assert.assertThrows("Cannot read strings longer than " + BinaryDecoder.MAX_ARRAY_SIZE + " bytes", - UnsupportedOperationException.class, bd::readString); + /** + * Verify that a byte-array-backed decoder rejects a string whose varint length + * exceeds the remaining bytes, throwing {@link EOFException} before + * allocating the buffer. + */ + @Test + public void testStringLengthExceedsAvailableBytes() throws IOException { + // Encode a varint claiming 10_000_000 bytes of string data, but supply none. + // The byte-array-backed decoder knows it has only a few bytes left after + // the varint, so ensureAvailableBytes must throw EOFException. + BinaryDecoder bd = newDecoder(false, 10_000_000L); + Assertions.assertThrows(EOFException.class, () -> bd.readString(null)); + } + + /** + * Same as {@link #testStringLengthExceedsAvailableBytes()} but for + * {@link BinaryDecoder#readBytes(ByteBuffer)}. + */ + @Test + public void testBytesLengthExceedsAvailableBytes() throws IOException { + BinaryDecoder bd = newDecoder(false, 10_000_000L); + Assertions.assertThrows(EOFException.class, () -> bd.readBytes(null)); } @Test - public void testNegativeBytesLength() throws IOException { - byte[] bad = new byte[] { (byte) 1 }; - Decoder bd = factory.binaryDecoder(bad, null); + public void testStringLengthDoesNotTrustUnknownAvailable() throws IOException { + byte[] encoded; + try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) { + BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(baos, null); + encoder.writeString("hello"); + encoder.flush(); + encoded = baos.toByteArray(); + } + + InputStream in = new ByteArrayInputStream(encoded) { + @Override + public synchronized int available() { + return 0; + } + }; - Assert.assertThrows("Malformed data. Length is negative: -1", AvroRuntimeException.class, () -> bd.readBytes(null)); + BinaryDecoder decoder = factory.binaryDecoder(in, null); + Assertions.assertEquals("hello", decoder.readString(null).toString()); } - @Test - public void testBytesMaxArraySize() throws IOException { - byte[] bad = new byte[10]; - BinaryData.encodeLong(BinaryDecoder.MAX_ARRAY_SIZE + 1, bad, 0); - Decoder bd = factory.binaryDecoder(bad, null); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testBytesNegativeLength(boolean useDirect) throws IOException { + Exception ex = Assertions.assertThrows(AvroRuntimeException.class, + () -> this.newDecoder(useDirect, -1).readBytes(null)); + Assertions.assertEquals(ERROR_NEGATIVE, ex.getMessage()); + } - Assert.assertThrows("Cannot read arrays longer than " + BinaryDecoder.MAX_ARRAY_SIZE + " bytes", - UnsupportedOperationException.class, () -> bd.readBytes(null)); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testBytesVmMaxSize(boolean useDirect) throws IOException { + Exception ex = Assertions.assertThrows(UnsupportedOperationException.class, + () -> this.newDecoder(useDirect, MAX_ARRAY_VM_LIMIT + 1).readBytes(null)); + Assertions.assertEquals(ERROR_VM_LIMIT_BYTES, ex.getMessage()); } - @Test - public void testBytesMaxLengthProperty() throws IOException { - int maxLength = 128; - byte[] bad = new byte[10]; - BinaryData.encodeLong(maxLength + 1, bad, 0); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testBytesMaxCustom(boolean useDirect) throws IOException { + try { + System.setProperty(SystemLimitException.MAX_BYTES_LENGTH_PROPERTY, Long.toString(128)); + resetLimits(); + Exception ex = Assertions.assertThrows(SystemLimitException.class, + () -> newDecoder(useDirect, 129).readBytes(null)); + Assertions.assertEquals("Bytes length 129 exceeds maximum allowed", ex.getMessage()); + } finally { + System.clearProperty(SystemLimitException.MAX_BYTES_LENGTH_PROPERTY); + resetLimits(); + } + } + + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testArrayVmMaxSize(boolean useDirect) throws IOException { + // At start + Exception ex = Assertions.assertThrows(UnsupportedOperationException.class, + () -> this.newDecoder(useDirect, MAX_ARRAY_VM_LIMIT + 1).readArrayStart()); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Next + ex = Assertions.assertThrows(UnsupportedOperationException.class, + () -> this.newDecoder(useDirect, MAX_ARRAY_VM_LIMIT + 1).arrayNext()); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // An OK reads followed by an overflow + Decoder bd = newDecoder(useDirect, MAX_ARRAY_VM_LIMIT - 100, Long.MAX_VALUE); + Assertions.assertEquals(MAX_ARRAY_VM_LIMIT - 100, bd.readArrayStart()); + ex = Assertions.assertThrows(UnsupportedOperationException.class, bd::arrayNext); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Two OK reads followed by going over the VM limit. + bd = newDecoder(useDirect, MAX_ARRAY_VM_LIMIT - 100, 100, 1); + Assertions.assertEquals(MAX_ARRAY_VM_LIMIT - 100, bd.readArrayStart()); + Assertions.assertEquals(100, bd.arrayNext()); + ex = Assertions.assertThrows(UnsupportedOperationException.class, bd::arrayNext); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Two OK reads followed by going over the VM limit, where negative numbers are + // followed by the byte length of the items. For testing, the 999 values are + // read but ignored. + bd = newDecoder(useDirect, 100 - MAX_ARRAY_VM_LIMIT, 999, -100, 999, 1); + Assertions.assertEquals(MAX_ARRAY_VM_LIMIT - 100, bd.readArrayStart()); + Assertions.assertEquals(100, bd.arrayNext()); + ex = Assertions.assertThrows(UnsupportedOperationException.class, bd::arrayNext); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + } + + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testArrayMaxCustom(boolean useDirect) throws IOException { try { - System.setProperty("org.apache.avro.limits.bytes.maxLength", Long.toString(maxLength)); - Decoder bd = factory.binaryDecoder(bad, null); + System.setProperty(SystemLimitException.MAX_COLLECTION_LENGTH_PROPERTY, Long.toString(128)); + resetLimits(); + Exception ex = Assertions.assertThrows(UnsupportedOperationException.class, + () -> newDecoder(useDirect, MAX_ARRAY_VM_LIMIT + 1).readArrayStart()); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Two OK reads followed by going over the custom limit. + Decoder bd = newDecoder(useDirect, 118, 10, 1); + Assertions.assertEquals(118, bd.readArrayStart()); + Assertions.assertEquals(10, bd.arrayNext()); + ex = Assertions.assertThrows(SystemLimitException.class, bd::arrayNext); + Assertions.assertEquals("Collection length 129 exceeds maximum allowed", ex.getMessage()); + + // Two OK reads followed by going over the VM limit, where negative numbers are + // followed by the byte length of the items. For testing, the 999 values are + // read but ignored. + bd = newDecoder(useDirect, -118, 999, -10, 999, 1); + Assertions.assertEquals(118, bd.readArrayStart()); + Assertions.assertEquals(10, bd.arrayNext()); + ex = Assertions.assertThrows(SystemLimitException.class, bd::arrayNext); + Assertions.assertEquals("Collection length 129 exceeds maximum allowed", ex.getMessage()); - Assert.assertThrows("Bytes length " + (maxLength + 1) + " exceeds maximum allowed", AvroRuntimeException.class, - () -> bd.readBytes(null)); } finally { - System.clearProperty("org.apache.avro.limits.bytes.maxLength"); + System.clearProperty(SystemLimitException.MAX_COLLECTION_LENGTH_PROPERTY); + resetLimits(); } } - @Test(expected = UnsupportedOperationException.class) - public void testLongLengthEncoding() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testMapVmMaxSize(boolean useDirect) throws IOException { + // At start + Exception ex = Assertions.assertThrows(UnsupportedOperationException.class, + () -> this.newDecoder(useDirect, MAX_ARRAY_VM_LIMIT + 1).readMapStart()); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Next + ex = Assertions.assertThrows(UnsupportedOperationException.class, + () -> this.newDecoder(useDirect, MAX_ARRAY_VM_LIMIT + 1).mapNext()); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Two OK reads followed by going over the VM limit. + Decoder bd = newDecoder(useDirect, MAX_ARRAY_VM_LIMIT - 100, 100, 1); + Assertions.assertEquals(MAX_ARRAY_VM_LIMIT - 100, bd.readMapStart()); + Assertions.assertEquals(100, bd.mapNext()); + ex = Assertions.assertThrows(UnsupportedOperationException.class, bd::mapNext); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Two OK reads followed by going over the VM limit, where negative numbers are + // followed by the byte length of the items. For testing, the 999 values are + // read but ignored. + bd = newDecoder(useDirect, 100 - MAX_ARRAY_VM_LIMIT, 999, -100, 999, 1); + Assertions.assertEquals(MAX_ARRAY_VM_LIMIT - 100, bd.readMapStart()); + Assertions.assertEquals(100, bd.mapNext()); + ex = Assertions.assertThrows(UnsupportedOperationException.class, bd::mapNext); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + } + + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testMapMaxCustom(boolean useDirect) throws IOException { + try { + System.setProperty(SystemLimitException.MAX_COLLECTION_LENGTH_PROPERTY, Long.toString(128)); + resetLimits(); + Exception ex = Assertions.assertThrows(UnsupportedOperationException.class, + () -> newDecoder(useDirect, MAX_ARRAY_VM_LIMIT + 1).readMapStart()); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Two OK reads followed by going over the custom limit. + Decoder bd = newDecoder(useDirect, 118, 10, 1); + Assertions.assertEquals(118, bd.readMapStart()); + Assertions.assertEquals(10, bd.mapNext()); + ex = Assertions.assertThrows(SystemLimitException.class, bd::mapNext); + Assertions.assertEquals("Collection length 129 exceeds maximum allowed", ex.getMessage()); + + // Two OK reads followed by going over the VM limit, where negative numbers are + // followed by the byte length of the items. For testing, the 999 values are + // read but ignored. + bd = newDecoder(useDirect, -118, 999, -10, 999, 1); + Assertions.assertEquals(118, bd.readMapStart()); + Assertions.assertEquals(10, bd.mapNext()); + ex = Assertions.assertThrows(SystemLimitException.class, bd::mapNext); + Assertions.assertEquals("Collection length 129 exceeds maximum allowed", ex.getMessage()); + + } finally { + System.clearProperty(SystemLimitException.MAX_COLLECTION_LENGTH_PROPERTY); + resetLimits(); + } + } + + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void longLengthEncoding(boolean useDirect) { // Size equivalent to Integer.MAX_VALUE + 1 byte[] bad = new byte[] { (byte) -128, (byte) -128, (byte) -128, (byte) -128, (byte) 16 }; - Decoder bd = factory.binaryDecoder(bad, null); - bd.readString(); + Decoder bd = this.newDecoder(bad, useDirect); + Assertions.assertThrows(UnsupportedOperationException.class, bd::readString); } - @Test(expected = EOFException.class) - public void testIntTooShort() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void intTooShort(boolean useDirect) { byte[] badint = new byte[4]; Arrays.fill(badint, (byte) 0xff); - newDecoder(badint).readInt(); + Assertions.assertThrows(EOFException.class, () -> newDecoder(badint, useDirect).readInt()); } - @Test(expected = EOFException.class) - public void testLongTooShort() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void longTooShort(boolean useDirect) { byte[] badint = new byte[9]; Arrays.fill(badint, (byte) 0xff); - newDecoder(badint).readLong(); + Assertions.assertThrows(EOFException.class, () -> newDecoder(badint, useDirect).readLong()); } - @Test(expected = EOFException.class) - public void testFloatTooShort() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void floatTooShort(boolean useDirect) { byte[] badint = new byte[3]; Arrays.fill(badint, (byte) 0xff); - newDecoder(badint).readInt(); + Assertions.assertThrows(EOFException.class, () -> newDecoder(badint, useDirect).readInt()); } - @Test(expected = EOFException.class) - public void testDoubleTooShort() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void doubleTooShort(boolean useDirect) { byte[] badint = new byte[7]; Arrays.fill(badint, (byte) 0xff); - newDecoder(badint).readLong(); + Assertions.assertThrows(EOFException.class, () -> newDecoder(badint, useDirect).readLong()); } - @Test - public void testSkipping() throws IOException { - Decoder d = newDecoder(data); - skipGenerated(d); - if (d instanceof BinaryDecoder) { - BinaryDecoder bd = (BinaryDecoder) d; - try { - Assert.assertTrue(bd.isEnd()); - } catch (UnsupportedOperationException e) { - // this is ok if its a DirectBinaryDecoder. - if (bd.getClass() != DirectBinaryDecoder.class) { - throw e; - } + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void skipping(boolean useDirect) throws IOException { + BinaryDecoder bd = newDecoder(data, useDirect); + skipGenerated(bd); + + try { + Assertions.assertTrue(bd.isEnd()); + } catch (UnsupportedOperationException e) { + // this is ok if its a DirectBinaryDecoder. + if (bd.getClass() != DirectBinaryDecoder.class) { + throw e; } - bd = factory.binaryDecoder(new ByteArrayInputStream(data), bd); - skipGenerated(bd); - try { - Assert.assertTrue(bd.isEnd()); - } catch (UnsupportedOperationException e) { - // this is ok if its a DirectBinaryDecoder. - if (bd.getClass() != DirectBinaryDecoder.class) { - throw e; - } + } + bd = this.newDecoder(new ByteArrayInputStream(data), bd, useDirect); + skipGenerated(bd); + try { + Assertions.assertTrue(bd.isEnd()); + } catch (UnsupportedOperationException e) { + // this is ok if its a DirectBinaryDecoder. + if (bd.getClass() != DirectBinaryDecoder.class) { + throw e; } } + } private void skipGenerated(Decoder bd) throws IOException { @@ -468,6 +704,7 @@ private void skipGenerated(Decoder bd) throws IOException { // booleans are one byte, array trailer is one byte bd.skipFixed((int) leftover + 1); bd.skipFixed(0); + bd.skipFixed(-8); // Should be a no-op; see AVRO-3635 bd.readLong(); } EOFException eof = null; @@ -476,19 +713,20 @@ private void skipGenerated(Decoder bd) throws IOException { } catch (EOFException e) { eof = e; } - Assert.assertTrue(null != eof); + Assertions.assertNotNull(eof); } - @Test(expected = EOFException.class) - public void testEOF() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eof(boolean useDirect) throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); Encoder e = EncoderFactory.get().binaryEncoder(baos, null); e.writeLong(0x10000000000000L); e.flush(); - Decoder d = newDecoder(new ByteArrayInputStream(baos.toByteArray())); - Assert.assertEquals(0x10000000000000L, d.readLong()); - d.readInt(); + Decoder d = newDecoder(new ByteArrayInputStream(baos.toByteArray()), useDirect); + Assertions.assertEquals(0x10000000000000L, d.readLong()); + Assertions.assertThrows(EOFException.class, () -> d.readInt()); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryEncoderFidelity.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryEncoderFidelity.java index f452c8b29b7..1f699ea8266 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryEncoderFidelity.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryEncoderFidelity.java @@ -17,15 +17,17 @@ */ package org.apache.avro.io; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; + import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.util.Random; import org.apache.avro.util.Utf8; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Test; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; public class TestBinaryEncoderFidelity { @@ -138,7 +140,7 @@ static void generateComplexData(Encoder e) throws IOException { e.flush(); } - @BeforeClass + @BeforeAll public static void generateLegacyData() throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); Encoder e = new LegacyBinaryEncoder(baos); @@ -150,49 +152,91 @@ public static void generateLegacyData() throws IOException { } @Test - public void testBinaryEncoder() throws IOException { + void binaryEncoder() throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); BinaryEncoder e = factory.binaryEncoder(baos, null); generateData(e, true); byte[] result = baos.toByteArray(); - Assert.assertEquals(legacydata.length, result.length); - Assert.assertArrayEquals(legacydata, result); + assertEquals(legacydata.length, result.length); + assertArrayEquals(legacydata, result); baos.reset(); generateComplexData(e); byte[] result2 = baos.toByteArray(); - Assert.assertEquals(complexdata.length, result2.length); - Assert.assertArrayEquals(complexdata, result2); + assertEquals(complexdata.length, result2.length); + assertArrayEquals(complexdata, result2); } @Test - public void testDirectBinaryEncoder() throws IOException { + void directBinaryEncoder() throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); BinaryEncoder e = factory.directBinaryEncoder(baos, null); generateData(e, true); byte[] result = baos.toByteArray(); - Assert.assertEquals(legacydata.length, result.length); - Assert.assertArrayEquals(legacydata, result); + assertEquals(legacydata.length, result.length); + assertArrayEquals(legacydata, result); + baos.reset(); + generateComplexData(e); + byte[] result2 = baos.toByteArray(); + assertEquals(complexdata.length, result2.length); + assertArrayEquals(complexdata, result2); + } + + @Test + void blockingDirectBinaryEncoder() throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BinaryEncoder e = factory.blockingDirectBinaryEncoder(baos, null); + generateData(e, true); + + byte[] result = baos.toByteArray(); + assertEquals(legacydata.length, result.length); + assertArrayEquals(legacydata, result); baos.reset(); + generateComplexData(e); byte[] result2 = baos.toByteArray(); - Assert.assertEquals(complexdata.length, result2.length); - Assert.assertArrayEquals(complexdata, result2); + // blocking will cause different length, should be two bytes larger + assertEquals(complexdata.length + 2, result2.length); + // the first byte is the array start, with the count of items negative + assertEquals(complexdata[0] >>> 1, result2[0]); + baos.reset(); + + e.writeArrayStart(); + e.setItemCount(1); + e.startItem(); + e.writeInt(1); + e.writeArrayEnd(); + + // 1: 1 element in the array + // 2: 1 byte for the int + // 3: zigzag encoded int + // 4: 0 elements in the next block + assertArrayEquals(baos.toByteArray(), new byte[] { 1, 2, 2, 0 }); + baos.reset(); + + e.writeArrayStart(); + e.setItemCount(0); + e.writeArrayEnd(); + + // This is correct + // 0: 0 elements in the block + assertArrayEquals(baos.toByteArray(), new byte[] { 0 }); + baos.reset(); } @Test - public void testBlockingBinaryEncoder() throws IOException { + void blockingBinaryEncoder() throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); BinaryEncoder e = factory.blockingBinaryEncoder(baos, null); generateData(e, true); byte[] result = baos.toByteArray(); - Assert.assertEquals(legacydata.length, result.length); - Assert.assertArrayEquals(legacydata, result); + assertEquals(legacydata.length, result.length); + assertArrayEquals(legacydata, result); baos.reset(); generateComplexData(e); byte[] result2 = baos.toByteArray(); // blocking will cause different length, should be two bytes larger - Assert.assertEquals(complexdata.length + 2, result2.length); + assertEquals(complexdata.length + 2, result2.length); // the first byte is the array start, with the count of items negative - Assert.assertEquals(complexdata[0] >>> 1, result2[0]); + assertEquals(complexdata[0] >>> 1, result2[0]); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestBlockingDirectBinaryEncoder.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestBlockingDirectBinaryEncoder.java new file mode 100644 index 00000000000..caf485500f0 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestBlockingDirectBinaryEncoder.java @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.io; + +import org.apache.avro.Schema; +import org.apache.avro.SchemaNormalization; +import org.apache.avro.SchemaParser; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.message.BinaryMessageDecoder; +import org.apache.avro.specific.TestRecordWithMapsAndArrays; +import org.hamcrest.Matchers; +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.security.NoSuchAlgorithmException; +import java.util.Arrays; +import java.util.Map; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.hasEntry; +import static org.hamcrest.Matchers.is; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +public class TestBlockingDirectBinaryEncoder { + + private void writeToArray(BinaryEncoder encoder, int[] numbers) throws IOException { + encoder.writeArrayStart(); + encoder.setItemCount(numbers.length); + for (int number : numbers) { + encoder.startItem(); + encoder.writeString(Integer.toString(number)); + } + encoder.writeArrayEnd(); + } + + private void writeToMap(BinaryEncoder encoder, long[] numbers) throws IOException { + encoder.writeMapStart(); + encoder.setItemCount(numbers.length); + for (long number : numbers) { + encoder.startItem(); + encoder.writeString(Long.toString(number)); + encoder.writeLong(number); + } + encoder.writeMapEnd(); + } + + @Test + void blockingDirectBinaryEncoder() throws IOException, NoSuchAlgorithmException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BinaryEncoder encoder = EncoderFactory.get().blockingDirectBinaryEncoder(baos, null); + + // This is needed because there is no BlockingDirectBinaryEncoder + // BinaryMessageWriter + // available out of the box + encoder.writeFixed(new byte[] { (byte) 0xC3, (byte) 0x01 }); + encoder.writeFixed(SchemaNormalization.parsingFingerprint("CRC-64-AVRO", TestRecordWithMapsAndArrays.SCHEMA$)); + + // Array + this.writeToArray(encoder, new int[] { 1, 2, 3, 4, 5 }); + + // Map + writeToMap(encoder, new long[] { 1L, 2L, 3L, 4L, 5L }); + + // Nested Array + + encoder.writeArrayStart(); + encoder.setItemCount(2); + this.writeToArray(encoder, new int[] { 1, 2 }); + this.writeToArray(encoder, new int[] { 3, 4, 5 }); + encoder.writeArrayEnd(); + + // Nested Map + + encoder.writeMapStart(); + encoder.setItemCount(2); + encoder.writeString("first"); + this.writeToMap(encoder, new long[] { 1L, 2L }); + encoder.writeString("second"); + this.writeToMap(encoder, new long[] { 3L, 4L, 5L }); + encoder.writeMapEnd(); + + // Read + + encoder.flush(); + + BinaryMessageDecoder decoder = TestRecordWithMapsAndArrays.getDecoder(); + TestRecordWithMapsAndArrays r = decoder.decode(baos.toByteArray()); + + assertThat(r.getArr(), is(Arrays.asList("1", "2", "3", "4", "5"))); + Map map = r.getMap(); + assertThat(map.size(), is(5)); + for (long i = 1; i <= 5; i++) { + assertThat(map.get(Long.toString(i)), is(i)); + } + + assertThat(r.getNestedArr(), is(Arrays.asList(Arrays.asList("1", "2"), Arrays.asList("3", "4", "5")))); + + Map> nestedMap = r.getNestedMap(); + assertThat(nestedMap.size(), is(2)); + + assertThat(nestedMap.get("first").size(), is(2)); + assertThat(nestedMap.get("first").get("1"), is(1L)); + assertThat(nestedMap.get("first").get("2"), is(2L)); + + assertThat(nestedMap.get("second").size(), is(3)); + assertThat(nestedMap.get("second").get("3"), is(3L)); + assertThat(nestedMap.get("second").get("4"), is(4L)); + assertThat(nestedMap.get("second").get("5"), is(5L)); + } + + @Test + void testSkippingUsingBlocks() throws IOException, NoSuchAlgorithmException { + // Create an empty schema for read, so we skip over all the fields + Schema emptySchema = SchemaParser.parseSingle( + "{\"type\":\"record\",\"name\":\"TestRecordWithMapsAndArrays\",\"namespace\":\"org.apache.avro.specific\",\"fields\":[]}"); + + GenericDatumReader in = new GenericDatumReader<>(TestRecordWithMapsAndArrays.SCHEMA$, emptySchema); + Decoder mockDecoder = mock(BinaryDecoder.class); + + for (long i = 0; i < 1; i++) { + in.read(null, mockDecoder); + } + + verify(mockDecoder, times(2)).skipMap(); + verify(mockDecoder, times(2)).skipArray(); + verify(mockDecoder, times(0)).readString(); + verify(mockDecoder, times(0)).readLong(); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestBlockingIO.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestBlockingIO.java index 6beda2ae66e..d107b9d82d7 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestBlockingIO.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestBlockingIO.java @@ -17,7 +17,12 @@ */ package org.apache.avro.io; -import static org.junit.Assert.*; +import com.fasterxml.jackson.core.JsonFactory; +import com.fasterxml.jackson.core.JsonParser; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -25,28 +30,14 @@ import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.util.ArrayDeque; -import java.util.Arrays; -import java.util.Collection; +import java.util.stream.Stream; -import com.fasterxml.jackson.core.JsonFactory; -import com.fasterxml.jackson.core.JsonParser; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; -@RunWith(Parameterized.class) public class TestBlockingIO { - private final int iSize; - private final int iDepth; - private final String sInput; - - public TestBlockingIO(int sz, int dp, String inp) { - this.iSize = sz; - this.iDepth = dp; - this.sInput = inp; - } - private static class Tests { private final JsonParser parser; private final Decoder input; @@ -206,25 +197,29 @@ public S(long count, boolean isArray) { } } - @Test - public void testScan() throws IOException { - Tests t = new Tests(iSize, iDepth, sInput); + @ParameterizedTest + @MethodSource("data") + public void testScan(int size, int depth, String input) throws IOException { + Tests t = new Tests(size, depth, input); t.scan(); } - @Test - public void testSkip1() throws IOException { - testSkip(iSize, iDepth, sInput, 0); + @ParameterizedTest + @MethodSource("data") + public void testSkip1(int size, int depth, String input) throws IOException { + testSkip(size, depth, input, 0); } - @Test - public void testSkip2() throws IOException { - testSkip(iSize, iDepth, sInput, 1); + @ParameterizedTest + @MethodSource("data") + public void testSkip2(int size, int depth, String input) throws IOException { + testSkip(size, depth, input, 1); } - @Test - public void testSkip3() throws IOException { - testSkip(iSize, iDepth, sInput, 2); + @ParameterizedTest + @MethodSource("data") + public void testSkip3(int size, int depth, String input) throws IOException { + testSkip(size, depth, input, 2); } private void testSkip(int bufferSize, int depth, String input, int skipLevel) throws IOException { @@ -323,9 +318,8 @@ private static void serialize(Encoder cos, JsonParser p, ByteArrayOutputStream o } } - @Parameterized.Parameters - public static Collection data() { - return Arrays.asList(new Object[][] { { 64, 0, "" }, { 64, 0, jss(0, 'a') }, { 64, 0, jss(3, 'a') }, + public static Stream data() { + return Stream.of(new Object[][] { { 64, 0, "" }, { 64, 0, jss(0, 'a') }, { 64, 0, jss(3, 'a') }, { 64, 0, jss(64, 'a') }, { 64, 0, jss(65, 'a') }, { 64, 0, jss(100, 'a') }, { 64, 1, "[]" }, { 64, 1, "[" + jss(0, 'a') + "]" }, { 64, 1, "[" + jss(3, 'a') + "]" }, { 64, 1, "[" + jss(61, 'a') + "]" }, { 64, 1, "[" + jss(62, 'a') + "]" }, { 64, 1, "[" + jss(64, 'a') + "]" }, { 64, 1, "[" + jss(65, 'a') + "]" }, @@ -387,7 +381,8 @@ public static Collection data() { { 100, 2, "[[\"pqr\", \"ab\", \"mnopqrstuvwx\"]]" }, { 64, 2, "[[[\"pqr\"]], [[\"ab\"], [\"mnopqrstuvwx\"]]]" }, { 64, 1, "{}" }, { 64, 1, "{\"n\": \"v\"}" }, { 64, 1, "{\"n1\": \"v\", \"n2\": []}" }, - { 100, 1, "{\"n1\": \"v\", \"n2\": []}" }, { 100, 1, "{\"n1\": \"v\", \"n2\": [\"abc\"]}" }, }); + { 100, 1, "{\"n1\": \"v\", \"n2\": []}" }, { 100, 1, "{\"n1\": \"v\", \"n2\": [\"abc\"]}" }, }) + .map(Arguments::of); } /** diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestBlockingIO2.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestBlockingIO2.java index 3a91bb96dea..378e17ee613 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestBlockingIO2.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestBlockingIO2.java @@ -17,14 +17,13 @@ */ package org.apache.avro.io; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + import java.io.ByteArrayOutputStream; import java.io.IOException; -import java.util.Arrays; -import java.util.Collection; - -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; +import java.util.stream.Stream; /** * This class has more exhaustive tests for Blocking IO. The reason we have both @@ -32,38 +31,29 @@ * TestBlockingIO2, it is hard to test skip() operations. and with the test * infrastructure of TestBlockingIO, it is hard to test enums, unions etc. */ -@RunWith(Parameterized.class) public class TestBlockingIO2 { - private final Decoder decoder; - private final String calls; - private Object[] values; - private String msg; - - public TestBlockingIO2(int bufferSize, int skipLevel, String calls) throws IOException { + @ParameterizedTest + @MethodSource("data") + public void testScan(int bufferSize, int skipLevel, String calls) throws IOException { ByteArrayOutputStream os = new ByteArrayOutputStream(); EncoderFactory factory = new EncoderFactory().configureBlockSize(bufferSize); Encoder encoder = factory.blockingBinaryEncoder(os, null); - this.values = TestValidatingIO.randomValues(calls); + Object[] values = TestValidatingIO.randomValues(calls); TestValidatingIO.generate(encoder, calls, values); encoder.flush(); byte[] bb = os.toByteArray(); - decoder = DecoderFactory.get().binaryDecoder(bb, null); - this.calls = calls; - this.msg = "Case: { " + bufferSize + ", " + skipLevel + ", \"" + calls + "\" }"; - } + Decoder decoder = DecoderFactory.get().binaryDecoder(bb, null); + String msg = "Case: { " + bufferSize + ", " + skipLevel + ", \"" + calls + "\" }"; - @Test - public void testScan() throws IOException { TestValidatingIO.check(msg, decoder, calls, values, -1); } - @Parameterized.Parameters - public static Collection data() { - return Arrays.asList(new Object[][] { { 64, 0, "" }, { 64, 0, "S0" }, { 64, 0, "S3" }, { 64, 0, "S64" }, + public static Stream data() { + return Stream.of(new Object[][] { { 64, 0, "" }, { 64, 0, "S0" }, { 64, 0, "S3" }, { 64, 0, "S64" }, { 64, 0, "S65" }, { 64, 0, "S100" }, { 64, 1, "[]" }, { 64, 1, "[c1sS0]" }, { 64, 1, "[c1sS3]" }, { 64, 1, "[c1sS61]" }, { 64, 1, "[c1sS62]" }, { 64, 1, "[c1sS64]" }, { 64, 1, "[c1sS65]" }, { 64, 1, "[c2sS0sS0]" }, { 64, 1, "[c2sS0sS10]" }, { 64, 1, "[c2sS0sS63]" }, { 64, 1, "[c2sS0sS64]" }, @@ -99,6 +89,6 @@ public static Collection data() { { 100, 1, "{c1sK5e10}" }, { 100, 1, "{c1sK5U1S10}" }, { 100, 1, "{c1sK5f10S10}" }, { 100, 1, "{c1sK5NS10}" }, { 100, 1, "{c1sK5BS10}" }, { 100, 1, "{c1sK5IS10}" }, { 100, 1, "{c1sK5LS10}" }, { 100, 1, "{c1sK5FS10}" }, - { 100, 1, "{c1sK5DS10}" }, }); + { 100, 1, "{c1sK5DS10}" }, }).map(Arguments::of); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestEncoders.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestEncoders.java index f3a0760d82e..df4ca4aeb02 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestEncoders.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestEncoders.java @@ -25,14 +25,15 @@ import org.apache.avro.AvroTypeException; import org.apache.avro.Schema; import org.apache.avro.Schema.Type; +import org.apache.avro.SchemaParser; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import java.io.BufferedOutputStream; import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.IOException; import java.io.OutputStream; import java.nio.ByteBuffer; @@ -47,150 +48,177 @@ import static java.util.Arrays.asList; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; -import static org.junit.Assert.assertThat; +import static org.junit.jupiter.api.Assertions.*; +import static org.hamcrest.MatcherAssert.assertThat; public class TestEncoders { private static final int ENCODER_BUFFER_SIZE = 32; private static final int EXAMPLE_DATA_SIZE = 17; - private static EncoderFactory factory = EncoderFactory.get(); + private static final EncoderFactory FACTORY = EncoderFactory.get(); - @Rule - public TemporaryFolder DIR = new TemporaryFolder(); + @TempDir + public Path dataDir; @Test - public void testBinaryEncoderInit() throws IOException { + void binaryEncoderInit() throws IOException { OutputStream out = new ByteArrayOutputStream(); - BinaryEncoder enc = factory.binaryEncoder(out, null); - Assert.assertSame(enc, factory.binaryEncoder(out, enc)); + BinaryEncoder enc = FACTORY.binaryEncoder(out, null); + assertSame(enc, FACTORY.binaryEncoder(out, enc)); } - @Test(expected = NullPointerException.class) - public void testBadBinaryEncoderInit() { - factory.binaryEncoder(null, null); + @Test + void badBinaryEncoderInit() { + assertThrows(NullPointerException.class, () -> { + FACTORY.binaryEncoder(null, null); + }); } @Test - public void testBlockingBinaryEncoderInit() throws IOException { + void blockingBinaryEncoderInit() throws IOException { OutputStream out = new ByteArrayOutputStream(); BinaryEncoder reuse = null; - reuse = factory.blockingBinaryEncoder(out, reuse); - Assert.assertSame(reuse, factory.blockingBinaryEncoder(out, reuse)); + reuse = FACTORY.blockingBinaryEncoder(out, reuse); + assertSame(reuse, FACTORY.blockingBinaryEncoder(out, reuse)); // comparison } - @Test(expected = NullPointerException.class) - public void testBadBlockintBinaryEncoderInit() { - factory.binaryEncoder(null, null); + @Test + void badBlockintBinaryEncoderInit() { + assertThrows(NullPointerException.class, () -> { + FACTORY.binaryEncoder(null, null); + }); + } + + @Test + void directBinaryEncoderInit() throws IOException { + OutputStream out = new ByteArrayOutputStream(); + BinaryEncoder enc = FACTORY.directBinaryEncoder(out, null); + assertSame(enc, FACTORY.directBinaryEncoder(out, enc)); + } + + @Test + void badDirectBinaryEncoderInit() { + assertThrows(NullPointerException.class, () -> { + FACTORY.directBinaryEncoder(null, null); + }); } @Test - public void testDirectBinaryEncoderInit() throws IOException { + void blockingDirectBinaryEncoderInit() throws IOException { OutputStream out = new ByteArrayOutputStream(); - BinaryEncoder enc = factory.directBinaryEncoder(out, null); - Assert.assertSame(enc, factory.directBinaryEncoder(out, enc)); + BinaryEncoder enc = FACTORY.blockingDirectBinaryEncoder(out, null); + assertSame(enc, FACTORY.blockingDirectBinaryEncoder(out, enc)); } - @Test(expected = NullPointerException.class) - public void testBadDirectBinaryEncoderInit() { - factory.directBinaryEncoder(null, null); + @Test + void badBlockingDirectBinaryEncoderInit() { + assertThrows(NullPointerException.class, () -> { + FACTORY.blockingDirectBinaryEncoder(null, null); + }); } @Test - public void testJsonEncoderInit() throws IOException { - Schema s = new Schema.Parser().parse("\"int\""); + void jsonEncoderInit() throws IOException { + Schema s = Schema.create(Type.INT); OutputStream out = new ByteArrayOutputStream(); - factory.jsonEncoder(s, out); - JsonEncoder enc = factory.jsonEncoder(s, new JsonFactory().createGenerator(out, JsonEncoding.UTF8)); + FACTORY.jsonEncoder(s, out); + JsonEncoder enc = FACTORY.jsonEncoder(s, new JsonFactory().createGenerator(out, JsonEncoding.UTF8)); enc.configure(out); } - @Test(expected = NullPointerException.class) - public void testBadJsonEncoderInitOS() throws IOException { - factory.jsonEncoder(Schema.create(Type.INT), (OutputStream) null); + @Test + void badJsonEncoderInitOS() throws IOException { + assertThrows(NullPointerException.class, () -> { + FACTORY.jsonEncoder(Schema.create(Type.INT), (OutputStream) null); + }); } - @Test(expected = NullPointerException.class) - public void testBadJsonEncoderInit() throws IOException { - factory.jsonEncoder(Schema.create(Type.INT), (JsonGenerator) null); + @Test + void badJsonEncoderInit() throws IOException { + assertThrows(NullPointerException.class, () -> { + FACTORY.jsonEncoder(Schema.create(Type.INT), (JsonGenerator) null); + }); } @Test - public void testJsonEncoderNewlineDelimited() throws IOException { + void jsonEncoderNewlineDelimited() throws IOException { OutputStream out = new ByteArrayOutputStream(); Schema ints = Schema.create(Type.INT); - Encoder e = factory.jsonEncoder(ints, out); + Encoder e = FACTORY.jsonEncoder(ints, out); String separator = System.getProperty("line.separator"); GenericDatumWriter writer = new GenericDatumWriter<>(ints); writer.write(1, e); writer.write(2, e); e.flush(); - Assert.assertEquals("1" + separator + "2", out.toString()); + assertEquals("1" + separator + "2", out.toString()); } @Test - public void testJsonEncoderWhenIncludeNamespaceOptionIsFalse() throws IOException { + void jsonEncoderWhenIncludeNamespaceOptionIsFalse() throws IOException { String value = "{\"b\": {\"string\":\"myVal\"}, \"a\": 1}"; String schemaStr = "{\"type\": \"record\", \"name\": \"ab\", \"fields\": [" + "{\"name\": \"a\", \"type\": \"int\"}, {\"name\": \"b\", \"type\": [\"null\", \"string\"]}" + "]}"; - Schema schema = new Schema.Parser().parse(schemaStr); + Schema schema = SchemaParser.parseSingle(schemaStr); byte[] avroBytes = fromJsonToAvro(value, schema); ObjectMapper mapper = new ObjectMapper(); - Assert.assertEquals(mapper.readTree("{\"b\":\"myVal\",\"a\":1}"), + assertEquals(mapper.readTree("{\"b\":\"myVal\",\"a\":1}"), mapper.readTree(fromAvroToJson(avroBytes, schema, false))); } @Test - public void testJsonEncoderWhenIncludeNamespaceOptionIsTrue() throws IOException { + void jsonEncoderWhenIncludeNamespaceOptionIsTrue() throws IOException { String value = "{\"b\": {\"string\":\"myVal\"}, \"a\": 1}"; String schemaStr = "{\"type\": \"record\", \"name\": \"ab\", \"fields\": [" + "{\"name\": \"a\", \"type\": \"int\"}, {\"name\": \"b\", \"type\": [\"null\", \"string\"]}" + "]}"; - Schema schema = new Schema.Parser().parse(schemaStr); + Schema schema = SchemaParser.parseSingle(schemaStr); byte[] avroBytes = fromJsonToAvro(value, schema); ObjectMapper mapper = new ObjectMapper(); - Assert.assertEquals(mapper.readTree("{\"b\":{\"string\":\"myVal\"},\"a\":1}"), + assertEquals(mapper.readTree("{\"b\":{\"string\":\"myVal\"},\"a\":1}"), mapper.readTree(fromAvroToJson(avroBytes, schema, true))); } @Test - public void testValidatingEncoderInit() throws IOException { - Schema s = new Schema.Parser().parse("\"int\""); + void validatingEncoderInit() throws IOException { + Schema s = Schema.create(Type.INT); OutputStream out = new ByteArrayOutputStream(); - Encoder e = factory.directBinaryEncoder(out, null); - factory.validatingEncoder(s, e).configure(e); + Encoder e = FACTORY.directBinaryEncoder(out, null); + FACTORY.validatingEncoder(s, e).configure(e); } @Test - public void testJsonRecordOrdering() throws IOException { + void jsonRecordOrdering() throws IOException { String value = "{\"b\": 2, \"a\": 1}"; - Schema schema = new Schema.Parser().parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [" + Schema schema = SchemaParser.parseSingle("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [" + "{\"name\": \"a\", \"type\": \"int\"}, {\"name\": \"b\", \"type\": \"int\"}" + "]}"); GenericDatumReader reader = new GenericDatumReader<>(schema); Decoder decoder = DecoderFactory.get().jsonDecoder(schema, value); Object o = reader.read(null, decoder); - Assert.assertEquals("{\"a\": 1, \"b\": 2}", o.toString()); + assertEquals("{\"a\": 1, \"b\": 2}", o.toString()); } - @Test(expected = AvroTypeException.class) - public void testJsonExcessFields() throws IOException { - String value = "{\"b\": { \"b3\": 1.4, \"b2\": 3.14, \"b1\": \"h\"}, \"a\": {\"a0\": 45, \"a2\":true, \"a1\": null}}"; - Schema schema = new Schema.Parser().parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" - + "{\"name\": \"a\", \"type\": {\"type\":\"record\",\"name\":\"A\",\"fields\":\n" - + "[{\"name\":\"a1\", \"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}},\n" - + "{\"name\": \"b\", \"type\": {\"type\":\"record\",\"name\":\"B\",\"fields\":\n" - + "[{\"name\":\"b1\", \"type\":\"string\"}, {\"name\":\"b2\", \"type\":\"float\"}, {\"name\":\"b3\", \"type\":\"double\"}]}}\n" - + "]}"); - GenericDatumReader reader = new GenericDatumReader<>(schema); - Decoder decoder = DecoderFactory.get().jsonDecoder(schema, value); - reader.read(null, decoder); + @Test + void jsonExcessFields() throws IOException { + assertThrows(AvroTypeException.class, () -> { + String value = "{\"b\": { \"b3\": 1.4, \"b2\": 3.14, \"b1\": \"h\"}, \"a\": {\"a0\": 45, \"a2\":true, \"a1\": null}}"; + Schema schema = SchemaParser.parseSingle("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" + + "{\"name\": \"a\", \"type\": {\"type\":\"record\",\"name\":\"A\",\"fields\":\n" + + "[{\"name\":\"a1\", \"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}},\n" + + "{\"name\": \"b\", \"type\": {\"type\":\"record\",\"name\":\"B\",\"fields\":\n" + + "[{\"name\":\"b1\", \"type\":\"string\"}, {\"name\":\"b2\", \"type\":\"float\"}, {\"name\":\"b3\", \"type\":\"double\"}]}}\n" + + "]}"); + GenericDatumReader reader = new GenericDatumReader<>(schema); + Decoder decoder = DecoderFactory.get().jsonDecoder(schema, value); + reader.read(null, decoder); + }); } @Test - public void testJsonRecordOrdering2() throws IOException { + void jsonRecordOrdering2() throws IOException { String value = "{\"b\": { \"b3\": 1.4, \"b2\": 3.14, \"b1\": \"h\"}, \"a\": {\"a2\":true, \"a1\": null}}"; - Schema schema = new Schema.Parser().parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" + Schema schema = SchemaParser.parseSingle("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" + "{\"name\": \"a\", \"type\": {\"type\":\"record\",\"name\":\"A\",\"fields\":\n" + "[{\"name\":\"a1\", \"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}},\n" + "{\"name\": \"b\", \"type\": {\"type\":\"record\",\"name\":\"B\",\"fields\":\n" @@ -199,56 +227,56 @@ public void testJsonRecordOrdering2() throws IOException { GenericDatumReader reader = new GenericDatumReader<>(schema); Decoder decoder = DecoderFactory.get().jsonDecoder(schema, value); Object o = reader.read(null, decoder); - Assert.assertEquals("{\"a\": {\"a1\": null, \"a2\": true}, \"b\": {\"b1\": \"h\", \"b2\": 3.14, \"b3\": 1.4}}", + assertEquals("{\"a\": {\"a1\": null, \"a2\": true}, \"b\": {\"b1\": \"h\", \"b2\": 3.14, \"b3\": 1.4}}", o.toString()); } @Test - public void testJsonRecordOrderingWithProjection() throws IOException { + void jsonRecordOrderingWithProjection() throws IOException { String value = "{\"b\": { \"b3\": 1.4, \"b2\": 3.14, \"b1\": \"h\"}, \"a\": {\"a2\":true, \"a1\": null}}"; - Schema writerSchema = new Schema.Parser().parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" + Schema writerSchema = SchemaParser.parseSingle("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" + "{\"name\": \"a\", \"type\": {\"type\":\"record\",\"name\":\"A\",\"fields\":\n" + "[{\"name\":\"a1\", \"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}},\n" + "{\"name\": \"b\", \"type\": {\"type\":\"record\",\"name\":\"B\",\"fields\":\n" + "[{\"name\":\"b1\", \"type\":\"string\"}, {\"name\":\"b2\", \"type\":\"float\"}, {\"name\":\"b3\", \"type\":\"double\"}]}}\n" + "]}"); - Schema readerSchema = new Schema.Parser().parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" + Schema readerSchema = SchemaParser.parseSingle("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" + "{\"name\": \"a\", \"type\": {\"type\":\"record\",\"name\":\"A\",\"fields\":\n" + "[{\"name\":\"a1\", \"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}}\n" + "]}"); GenericDatumReader reader = new GenericDatumReader<>(writerSchema, readerSchema); Decoder decoder = DecoderFactory.get().jsonDecoder(writerSchema, value); Object o = reader.read(null, decoder); - Assert.assertEquals("{\"a\": {\"a1\": null, \"a2\": true}}", o.toString()); + assertEquals("{\"a\": {\"a1\": null, \"a2\": true}}", o.toString()); } @Test - public void testJsonRecordOrderingWithProjection2() throws IOException { + void jsonRecordOrderingWithProjection2() throws IOException { String value = "{\"b\": { \"b1\": \"h\", \"b2\": [3.14, 3.56], \"b3\": 1.4}, \"a\": {\"a2\":true, \"a1\": null}}"; - Schema writerSchema = new Schema.Parser().parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" + Schema writerSchema = SchemaParser.parseSingle("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" + "{\"name\": \"a\", \"type\": {\"type\":\"record\",\"name\":\"A\",\"fields\":\n" + "[{\"name\":\"a1\", \"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}},\n" + "{\"name\": \"b\", \"type\": {\"type\":\"record\",\"name\":\"B\",\"fields\":\n" + "[{\"name\":\"b1\", \"type\":\"string\"}, {\"name\":\"b2\", \"type\":{\"type\":\"array\", \"items\":\"float\"}}, {\"name\":\"b3\", \"type\":\"double\"}]}}\n" + "]}"); - Schema readerSchema = new Schema.Parser().parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" + Schema readerSchema = SchemaParser.parseSingle("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" + "{\"name\": \"a\", \"type\": {\"type\":\"record\",\"name\":\"A\",\"fields\":\n" + "[{\"name\":\"a1\", \"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}}\n" + "]}"); GenericDatumReader reader = new GenericDatumReader<>(writerSchema, readerSchema); Decoder decoder = DecoderFactory.get().jsonDecoder(writerSchema, value); Object o = reader.read(null, decoder); - Assert.assertEquals("{\"a\": {\"a1\": null, \"a2\": true}}", o.toString()); + assertEquals("{\"a\": {\"a1\": null, \"a2\": true}}", o.toString()); } @Test - public void testArrayBackedByteBuffer() throws IOException { + void arrayBackedByteBuffer() throws IOException { ByteBuffer buffer = ByteBuffer.wrap(someBytes(EXAMPLE_DATA_SIZE)); testWithBuffer(buffer); } @Test - public void testMappedByteBuffer() throws IOException { - Path file = Paths.get(DIR.getRoot().getPath() + "testMappedByteBuffer.avro"); + void mappedByteBuffer() throws IOException { + Path file = dataDir.resolve("testMappedByteBuffer.avro"); Files.write(file, someBytes(EXAMPLE_DATA_SIZE)); MappedByteBuffer buffer = FileChannel.open(file, StandardOpenOption.READ).map(FileChannel.MapMode.READ_ONLY, 0, EXAMPLE_DATA_SIZE); @@ -311,7 +339,7 @@ private String fromAvroToJson(byte[] avroBytes, Schema schema, boolean includeNa DatumWriter writer = new GenericDatumWriter<>(schema); ByteArrayOutputStream output = new ByteArrayOutputStream(); - JsonEncoder encoder = factory.jsonEncoder(schema, output); + JsonEncoder encoder = FACTORY.jsonEncoder(schema, output); encoder.setIncludeNamespace(includeNamespace); Decoder decoder = DecoderFactory.get().binaryDecoder(avroBytes, null); Object datum = reader.read(null, decoder); @@ -321,4 +349,35 @@ private String fromAvroToJson(byte[] avroBytes, Schema schema, boolean includeNa return new String(output.toByteArray(), StandardCharsets.UTF_8.name()); } + + @Test + public void testJsonEncoderInitAutoFlush() throws IOException { + Schema s = Schema.create(Type.INT); + OutputStream baos = new ByteArrayOutputStream(); + OutputStream out = new BufferedOutputStream(baos); + JsonEncoder enc = FACTORY.jsonEncoder(s, out, false); + enc.configure(out, false); + enc.writeInt(24); + enc.flush(); + assertEquals("", baos.toString()); + out.flush(); + assertEquals("24", baos.toString()); + } + + @Test + public void testJsonEncoderInitAutoFlushDisabled() throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + OutputStream out = new BufferedOutputStream(baos); + Schema ints = Schema.create(Type.INT); + Encoder e = FACTORY.jsonEncoder(ints, out, false, false); + String separator = System.getProperty("line.separator"); + GenericDatumWriter writer = new GenericDatumWriter(ints); + writer.write(1, e); + writer.write(2, e); + e.flush(); + assertEquals("", baos.toString()); + out.flush(); + assertEquals("1" + separator + "2", baos.toString()); + out.close(); + } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestFastReaderBuilderClassLoading.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestFastReaderBuilderClassLoading.java new file mode 100644 index 00000000000..2feedaedaaf --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestFastReaderBuilderClassLoading.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.io; + +import static org.junit.jupiter.api.Assertions.*; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.net.URI; +import java.util.Collections; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.generic.GenericRecordBuilder; +import org.apache.avro.specific.SpecificData; +import org.apache.avro.util.ClassSecurityValidator; +import org.apache.avro.util.ClassSecurityValidator.ClassSecurityPredicate; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** + * Tests that FastReaderBuilder.findClass() routes class loading through + * ClassUtils.forName(), so that ClassSecurityValidator is applied consistently. + */ +public class TestFastReaderBuilderClassLoading { + + private static final String TEST_VALUE = "https://example.com"; + + private ClassSecurityPredicate originalValidator; + + @BeforeEach + public void saveValidator() { + originalValidator = ClassSecurityValidator.getGlobal(); + } + + @AfterEach + public void restoreValidator() { + ClassSecurityValidator.setGlobal(originalValidator); + } + + /** + * When the validator blocks a class referenced by java-class, FastReaderBuilder + * must propagate the SecurityException so the caller knows why the class was + * rejected. + */ + @Test + void blockedClassThrowsSecurityException() { + // Block java.net.URI + ClassSecurityValidator.setGlobal(ClassSecurityValidator.composite(ClassSecurityValidator.DEFAULT_TRUSTED_CLASSES, + ClassSecurityValidator.builder().add("org.apache.avro.util.Utf8").build())); + + assertThrows(SecurityException.class, () -> readWithJavaClass("java.net.URI"), + "Blocked class should cause a SecurityException to propagate"); + } + + /** + * When the validator trusts a class referenced by java-class, FastReaderBuilder + * should instantiate it normally. + */ + @Test + void trustedClassIsInstantiated() { + ClassSecurityValidator.setGlobal(ClassSecurityValidator.composite(ClassSecurityValidator.DEFAULT_TRUSTED_CLASSES, + ClassSecurityValidator.builder().add("java.net.URI").add("org.apache.avro.util.Utf8").build())); + + GenericRecord result = readWithJavaClass("java.net.URI"); + + assertInstanceOf(URI.class, result.get("value")); + assertEquals(URI.create(TEST_VALUE), result.get("value")); + } + + /** + * Encode a string, then read it back through FastReaderBuilder with the given + * java-class. + */ + private GenericRecord readWithJavaClass(String javaClass) { + try { + Schema stringSchema = Schema.create(Schema.Type.STRING); + stringSchema.addProp(SpecificData.CLASS_PROP, javaClass); + stringSchema.addProp(GenericData.STRING_PROP, GenericData.StringType.String.name()); + + Schema recordSchema = Schema.createRecord("TestRecord", null, "test", false); + recordSchema.setFields(Collections.singletonList(new Schema.Field("value", stringSchema, null, null))); + + // Encode + GenericRecord record = new GenericRecordBuilder(recordSchema).set("value", TEST_VALUE).build(); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + Encoder encoder = EncoderFactory.get().binaryEncoder(out, null); + new GenericDatumWriter(recordSchema).write(record, encoder); + encoder.flush(); + + // Decode with fast reader enabled + GenericData data = new GenericData(); + data.setFastReaderEnabled(true); + GenericDatumReader reader = new GenericDatumReader<>(recordSchema, recordSchema, data); + return reader.read(null, DecoderFactory.get().binaryDecoder(new ByteArrayInputStream(out.toByteArray()), null)); + } catch (IOException e) { + return fail("Unexpected IOException during encode/decode", e); + } + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestJsonDecoder.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestJsonDecoder.java index 54fc4203080..95b8b02115d 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestJsonDecoder.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestJsonDecoder.java @@ -17,37 +17,45 @@ */ package org.apache.avro.io; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import org.apache.avro.AvroTypeException; import org.apache.avro.Schema; +import org.apache.avro.SchemaBuilder; +import org.apache.avro.SchemaParser; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericRecord; -import org.junit.Assert; -import org.junit.Test; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.io.IOException; public class TestJsonDecoder { @Test - public void testInt() throws Exception { + void testInt() throws Exception { checkNumeric("int", 1); } @Test - public void testLong() throws Exception { + void testLong() throws Exception { checkNumeric("long", 1L); } @Test - public void testFloat() throws Exception { + void testFloat() throws Exception { checkNumeric("float", 1.0F); } @Test - public void testDouble() throws Exception { + void testDouble() throws Exception { checkNumeric("double", 1.0); } private void checkNumeric(String type, Object value) throws Exception { String def = "{\"type\":\"record\",\"name\":\"X\",\"fields\":" + "[{\"type\":\"" + type + "\",\"name\":\"n\"}]}"; - Schema schema = new Schema.Parser().parse(def); + Schema schema = SchemaParser.parseSingle(def); DatumReader reader = new GenericDatumReader<>(schema); String[] records = { "{\"n\":1}", "{\"n\":1.0}" }; @@ -55,7 +63,7 @@ private void checkNumeric(String type, Object value) throws Exception { for (String record : records) { Decoder decoder = DecoderFactory.get().jsonDecoder(schema, record); GenericRecord r = reader.read(null, decoder); - Assert.assertEquals(value, r.get("n")); + assertEquals(value, r.get("n")); } } @@ -63,16 +71,48 @@ private void checkNumeric(String type, Object value) throws Exception { // in schema, // it works. @Test - public void testReorderFields() throws Exception { + void reorderFields() throws Exception { String w = "{\"type\":\"record\",\"name\":\"R\",\"fields\":" + "[{\"type\":\"long\",\"name\":\"l\"}," + "{\"type\":{\"type\":\"array\",\"items\":\"int\"},\"name\":\"a\"}" + "]}"; - Schema ws = new Schema.Parser().parse(w); + Schema ws = SchemaParser.parseSingle(w); DecoderFactory df = DecoderFactory.get(); String data = "{\"a\":[1,2],\"l\":100}{\"l\": 200, \"a\":[1,2]}"; JsonDecoder in = df.jsonDecoder(ws, data); - Assert.assertEquals(100, in.readLong()); + assertEquals(100, in.readLong()); in.skipArray(); - Assert.assertEquals(200, in.readLong()); + assertEquals(200, in.readLong()); in.skipArray(); } + + @Test + void testIntWithError() throws IOException { + Schema schema = SchemaBuilder.builder("test").record("example").fields().requiredInt("id").endRecord(); + String record = "{ \"id\": -1.2 }"; + + GenericDatumReader reader = new GenericDatumReader<>(schema, schema); + JsonDecoder decoder = DecoderFactory.get().jsonDecoder(schema, record); + Assertions.assertThrows(AvroTypeException.class, () -> reader.read(null, decoder)); + } + + @Test + void testIeee754SpecialCases() throws IOException { + String def = "{\"type\":\"record\",\"name\":\"X\",\"fields\": [" + "{\"type\":\"float\",\"name\":\"nanFloat\"}," + + "{\"type\":\"float\",\"name\":\"infinityFloat\"}," + + "{\"type\":\"float\",\"name\":\"negativeInfinityFloat\"}," + "{\"type\":\"double\",\"name\":\"nanDouble\"}," + + "{\"type\":\"double\",\"name\":\"infinityDouble\"}," + + "{\"type\":\"double\",\"name\":\"negativeInfinityDouble\"}" + "]}"; + Schema schema = SchemaParser.parseSingle(def); + DatumReader reader = new GenericDatumReader<>(schema); + + String record = "{\"nanFloat\":\"NaN\", \"infinityFloat\":\"Infinity\", \"negativeInfinityFloat\":\"-Infinity\", " + + "\"nanDouble\":\"NaN\", \"infinityDouble\":\"Infinity\", \"negativeInfinityDouble\":\"-Infinity\"}"; + Decoder decoder = DecoderFactory.get().jsonDecoder(schema, record); + GenericRecord r = reader.read(null, decoder); + assertEquals(Float.NaN, r.get("nanFloat")); + assertEquals(Float.POSITIVE_INFINITY, r.get("infinityFloat")); + assertEquals(Float.NEGATIVE_INFINITY, r.get("negativeInfinityFloat")); + assertEquals(Double.NaN, r.get("nanDouble")); + assertEquals(Double.POSITIVE_INFINITY, r.get("infinityDouble")); + assertEquals(Double.NEGATIVE_INFINITY, r.get("negativeInfinityDouble")); + } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIO.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIO.java index c880d9fd55a..edc3b44722a 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIO.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIO.java @@ -17,48 +17,35 @@ */ package org.apache.avro.io; +import org.apache.avro.Schema; +import org.apache.avro.SchemaParser; +import org.apache.avro.io.TestValidatingIO.Encoding; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; -import java.util.Arrays; -import java.util.Collection; +import java.util.stream.Stream; -import org.apache.avro.Schema; -import org.apache.avro.io.TestValidatingIO.Encoding; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -@RunWith(Parameterized.class) public class TestResolvingIO { - protected final Encoding eEnc; - protected final int iSkipL; - protected final String sJsWrtSchm; - protected final String sWrtCls; - protected final String sJsRdrSchm; - protected final String sRdrCls; - - public TestResolvingIO(Encoding encoding, int skipLevel, String jsonWriterSchema, String writerCalls, - String jsonReaderSchema, String readerCalls) { - this.eEnc = encoding; - this.iSkipL = skipLevel; - this.sJsWrtSchm = jsonWriterSchema; - this.sWrtCls = writerCalls; - this.sJsRdrSchm = jsonReaderSchema; - this.sRdrCls = readerCalls; - } - - @Test - public void testIdentical() throws IOException { - performTest(eEnc, iSkipL, sJsWrtSchm, sWrtCls, sJsWrtSchm, sWrtCls); + @ParameterizedTest + @MethodSource("data2") + public void testIdentical(Encoding encoding, int skip, String jsonWriterSchema, String writerCalls, + String jsonReaderSchema, String readerCalls) throws IOException { + performTest(encoding, skip, jsonWriterSchema, writerCalls, jsonWriterSchema, writerCalls); } private static final int COUNT = 10; - @Test - public void testCompatible() throws IOException { - performTest(eEnc, iSkipL, sJsWrtSchm, sWrtCls, sJsRdrSchm, sRdrCls); + @ParameterizedTest + @MethodSource("data2") + public void testCompatible(Encoding encoding, int skip, String jsonWriterSchema, String writerCalls, + String jsonReaderSchema, String readerCalls) throws IOException { + performTest(encoding, skip, jsonWriterSchema, writerCalls, jsonReaderSchema, readerCalls); } private void performTest(Encoding encoding, int skipLevel, String jsonWriterSchema, String writerCalls, @@ -73,9 +60,9 @@ private void testOnce(String jsonWriterSchema, String writerCalls, String jsonRe Object[] values = TestValidatingIO.randomValues(writerCalls); Object[] expected = TestValidatingIO.randomValues(readerCalls); - Schema writerSchema = new Schema.Parser().parse(jsonWriterSchema); + Schema writerSchema = SchemaParser.parseSingle(jsonWriterSchema); byte[] bytes = TestValidatingIO.make(writerSchema, writerCalls, values, encoding); - Schema readerSchema = new Schema.Parser().parse(jsonReaderSchema); + Schema readerSchema = SchemaParser.parseSingle(jsonReaderSchema); TestValidatingIO.print(encoding, skipLevel, writerSchema, readerSchema, values, expected); check(writerSchema, readerSchema, bytes, readerCalls, expected, encoding, skipLevel); } @@ -100,9 +87,8 @@ static void check(Schema wsc, Schema rsc, byte[] bytes, String calls, Object[] v TestValidatingIO.check(msg, vi, calls, values, skipLevel); } - @Parameterized.Parameters - public static Collection data2() { - return Arrays.asList(TestValidatingIO.convertTo2dArray(encodings, skipLevels, testSchemas())); + public static Stream data2() { + return TestValidatingIO.convertTo2dStream(encodings, skipLevels, testSchemas()); } static Object[][] encodings = new Object[][] { { Encoding.BINARY }, { Encoding.BLOCKING_BINARY }, { Encoding.JSON } }; diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIOResolving.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIOResolving.java index 8e3dc8e53d7..5589285ae14 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIOResolving.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIOResolving.java @@ -17,53 +17,33 @@ */ package org.apache.avro.io; -import java.io.IOException; -import java.util.Arrays; -import java.util.Collection; - import org.apache.avro.Schema; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -@RunWith(Parameterized.class) -public class TestResolvingIOResolving { - protected TestValidatingIO.Encoding eEnc; - protected final int iSkipL; - protected final String sJsWrtSchm; - protected final String sWrtCls; - protected final String sJsRdrSchm; - protected final String sRdrCls; +import org.apache.avro.SchemaParser; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; - protected final Object[] oaWrtVals; - protected final Object[] oaRdrVals; +import java.io.IOException; +import java.util.stream.Stream; - public TestResolvingIOResolving(TestValidatingIO.Encoding encoding, int skipLevel, String jsonWriterSchema, - String writerCalls, Object[] writerValues, String jsonReaderSchema, String readerCalls, Object[] readerValues) { - this.eEnc = encoding; - this.iSkipL = skipLevel; - this.sJsWrtSchm = jsonWriterSchema; - this.sWrtCls = writerCalls; - this.oaWrtVals = writerValues; - this.sJsRdrSchm = jsonReaderSchema; - this.sRdrCls = readerCalls; - this.oaRdrVals = readerValues; - } +public class TestResolvingIOResolving { - @Test - public void testResolving() throws IOException { - Schema writerSchema = new Schema.Parser().parse(sJsWrtSchm); - byte[] bytes = TestValidatingIO.make(writerSchema, sWrtCls, oaWrtVals, eEnc); - Schema readerSchema = new Schema.Parser().parse(sJsRdrSchm); - TestValidatingIO.print(eEnc, iSkipL, writerSchema, readerSchema, oaWrtVals, oaRdrVals); - TestResolvingIO.check(writerSchema, readerSchema, bytes, sRdrCls, oaRdrVals, eEnc, iSkipL); + @ParameterizedTest + @MethodSource("data3") + public void testResolving(TestValidatingIO.Encoding encoding, int skipLevel, String jsonWriterSchema, + String writerCalls, Object[] writerValues, String jsonReaderSchema, String readerCalls, Object[] readerValues) + throws IOException { + Schema writerSchema = SchemaParser.parseSingle(jsonWriterSchema); + byte[] bytes = TestValidatingIO.make(writerSchema, writerCalls, writerValues, encoding); + Schema readerSchema = SchemaParser.parseSingle(jsonReaderSchema); + TestValidatingIO.print(encoding, skipLevel, writerSchema, readerSchema, writerValues, readerValues); + TestResolvingIO.check(writerSchema, readerSchema, bytes, readerCalls, readerValues, encoding, skipLevel); } - @Parameterized.Parameters - public static Collection data3() { - Collection ret = Arrays.asList(TestValidatingIO.convertTo2dArray(TestResolvingIO.encodings, - TestResolvingIO.skipLevels, dataForResolvingTests())); - return ret; + public static Stream data3() { + return TestValidatingIO.convertTo2dStream(TestResolvingIO.encodings, TestResolvingIO.skipLevels, + dataForResolvingTests()); } private static Object[][] dataForResolvingTests() { @@ -101,7 +81,7 @@ private static Object[][] dataForResolvingTests() { "{\"type\":\"record\",\"name\":\"outer\",\"fields\":[" + "{\"name\": \"g1\", " + "\"type\":{\"type\":\"record\",\"name\":\"inner\",\"fields\":[" + "{\"name\":\"f1\", \"type\":\"int\", \"default\": 101}," + "{\"name\":\"f2\", \"type\":\"int\"}]}}, " - + "{\"name\": \"g2\", \"type\": \"long\"}]}}", + + "{\"name\": \"g2\", \"type\": \"long\"}]}", "RRIIL", new Object[] { 10, 101, 11L } }, // Default value for a record. { "{\"type\":\"record\",\"name\":\"outer\",\"fields\":[" + "{\"name\": \"g2\", \"type\": \"long\"}]}", "L", diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestValidatingIO.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestValidatingIO.java index 3056d5430af..c5eb2ab6504 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestValidatingIO.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestValidatingIO.java @@ -17,9 +17,16 @@ */ package org.apache.avro.io; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.fail; +import org.apache.avro.Schema; +import org.apache.avro.SchemaParser; +import org.apache.avro.util.Utf8; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -27,20 +34,14 @@ import java.io.InputStream; import java.nio.ByteBuffer; import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.Random; -import org.apache.avro.Schema; -import org.apache.avro.util.Utf8; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import java.util.Spliterator; +import java.util.Spliterators; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; -@RunWith(Parameterized.class) public class TestValidatingIO { enum Encoding { BINARY, BLOCKING_BINARY, JSON, @@ -48,30 +49,19 @@ enum Encoding { private static final Logger LOG = LoggerFactory.getLogger(TestValidatingIO.class); - private Encoding eEnc; - private int iSkipL; - private String sJsSch; - private String sCl; - - public TestValidatingIO(Encoding enc, int skip, String js, String cls) { - this.eEnc = enc; - this.iSkipL = skip; - this.sJsSch = js; - this.sCl = cls; - } - private static final int COUNT = 1; - @Test - public void testMain() throws IOException { + @ParameterizedTest + @MethodSource("data") + public void testMain(Encoding enc, int skip, String js, String cls) throws IOException { for (int i = 0; i < COUNT; i++) { - testOnce(new Schema.Parser().parse(sJsSch), sCl, iSkipL, eEnc); + testOnce(SchemaParser.parseSingle(js), cls, skip, enc); } } private void testOnce(Schema schema, String calls, int skipLevel, Encoding encoding) throws IOException { Object[] values = randomValues(calls); - print(eEnc, iSkipL, schema, schema, values, values); + print(encoding, skipLevel, schema, schema, values, values); byte[] bytes = make(schema, calls, values, encoding); check(schema, bytes, calls, values, skipLevel, encoding); } @@ -204,7 +194,7 @@ public static void generate(Encoder vw, String calls, Object[] values) throws IO break; } default: - fail(); + Assertions.fail(); break; } } @@ -254,7 +244,7 @@ public static Object[] randomValues(String calls) { case 's': break; default: - fail(); + Assertions.fail(); break; } } @@ -324,25 +314,25 @@ public static void check(String msg, Decoder vi, String calls, Object[] values, vi.readNull(); break; case 'B': - assertEquals(msg, values[p++], vi.readBoolean()); + Assertions.assertEquals(values[p++], vi.readBoolean(), msg); break; case 'I': - assertEquals(msg, values[p++], vi.readInt()); + Assertions.assertEquals(values[p++], vi.readInt(), msg); break; case 'L': - assertEquals(msg, values[p++], vi.readLong()); + Assertions.assertEquals(values[p++], vi.readLong(), msg); break; case 'F': if (!(values[p] instanceof Float)) - fail(); + Assertions.fail(); float f = (Float) values[p++]; - assertEquals(msg, f, vi.readFloat(), Math.abs(f / 1000)); + Assertions.assertEquals(f, vi.readFloat(), Math.abs(f / 1000)); break; case 'D': if (!(values[p] instanceof Double)) - fail(); + Assertions.fail(); double d = (Double) values[p++]; - assertEquals(msg, d, vi.readDouble(), Math.abs(d / 1000)); + Assertions.assertEquals(d, vi.readDouble(), Math.abs(d / 1000), msg); break; case 'S': extractInt(cs); @@ -351,7 +341,7 @@ public static void check(String msg, Decoder vi, String calls, Object[] values, p++; } else { String s = (String) values[p++]; - assertEquals(msg, new Utf8(s), vi.readString(null)); + Assertions.assertEquals(new Utf8(s), vi.readString(null), msg); } break; case 'K': @@ -361,7 +351,7 @@ public static void check(String msg, Decoder vi, String calls, Object[] values, p++; } else { String s = (String) values[p++]; - assertEquals(msg, new Utf8(s), vi.readString(null)); + Assertions.assertEquals(new Utf8(s), vi.readString(null), msg); } break; case 'b': @@ -374,7 +364,7 @@ public static void check(String msg, Decoder vi, String calls, Object[] values, ByteBuffer bb2 = vi.readBytes(null); byte[] actBytes = new byte[bb2.remaining()]; System.arraycopy(bb2.array(), bb2.position(), actBytes, 0, bb2.remaining()); - assertArrayEquals(msg, bb, actBytes); + Assertions.assertArrayEquals(bb, actBytes, msg); } break; case 'f': { @@ -386,7 +376,7 @@ public static void check(String msg, Decoder vi, String calls, Object[] values, byte[] bb = (byte[]) values[p++]; byte[] actBytes = new byte[len]; vi.readFixed(actBytes); - assertArrayEquals(msg, bb, actBytes); + Assertions.assertArrayEquals(bb, actBytes, msg); } } break; @@ -395,7 +385,7 @@ public static void check(String msg, Decoder vi, String calls, Object[] values, if (level == skipLevel) { vi.readEnum(); } else { - assertEquals(msg, e, vi.readEnum()); + Assertions.assertEquals(e, vi.readEnum(), msg); } } break; @@ -422,16 +412,16 @@ public static void check(String msg, Decoder vi, String calls, Object[] values, continue; } case ']': - assertEquals(msg, 0, counts[level]); + Assertions.assertEquals(0, counts[level], msg); if (!isEmpty[level]) { - assertEquals(msg, 0, vi.arrayNext()); + Assertions.assertEquals(0, vi.arrayNext(), msg); } level--; break; case '}': - assertEquals(0, counts[level]); + Assertions.assertEquals(0, counts[level]); if (!isEmpty[level]) { - assertEquals(msg, 0, vi.mapNext()); + Assertions.assertEquals(0, vi.mapNext(), msg); } level--; break; @@ -450,28 +440,28 @@ public static void check(String msg, Decoder vi, String calls, Object[] values, continue; case 'U': { int idx = extractInt(cs); - assertEquals(msg, idx, vi.readIndex()); + Assertions.assertEquals(idx, vi.readIndex(), msg); continue; } case 'R': ((ResolvingDecoder) vi).readFieldOrder(); continue; default: - fail(msg); + Assertions.fail(msg); } } catch (RuntimeException e) { throw new RuntimeException(msg, e); } } - assertEquals(msg, values.length, p); + Assertions.assertEquals(values.length, p, msg); } private static int skip(String msg, InputScanner cs, Decoder vi, boolean isArray) throws IOException { final char end = isArray ? ']' : '}'; if (isArray) { - assertEquals(msg, 0, vi.skipArray()); + Assertions.assertEquals(0, vi.skipArray(), msg); } else if (end == '}') { - assertEquals(msg, 0, vi.skipMap()); + Assertions.assertEquals(0, vi.skipMap(), msg); } int level = 0; int p = 0; @@ -507,9 +497,8 @@ private static int skip(String msg, InputScanner cs, Decoder vi, boolean isArray throw new RuntimeException("Don't know how to skip"); } - @Parameterized.Parameters - public static Collection data() { - return Arrays.asList(convertTo2dArray(encodings, skipLevels, testSchemas())); + public static Stream data() { + return convertTo2dStream(encodings, skipLevels, testSchemas()); } private static Object[][] encodings = new Object[][] { { Encoding.BINARY }, { Encoding.BLOCKING_BINARY }, @@ -517,19 +506,11 @@ public static Collection data() { private static Object[][] skipLevels = new Object[][] { { -1 }, { 0 }, { 1 }, { 2 }, }; - public static Object[][] convertTo2dArray(final Object[][]... values) { - ArrayList ret = new ArrayList<>(); - + public static Stream convertTo2dStream(final Object[][]... values) { Iterator iter = cartesian(values); - while (iter.hasNext()) { - Object[] objects = iter.next(); - ret.add(objects); - } - Object[][] retArrays = new Object[ret.size()][]; - for (int i = 0; i < ret.size(); i++) { - retArrays[i] = ret.get(i); - } - return retArrays; + Stream stream = StreamSupport.stream(Spliterators.spliteratorUnknownSize(iter, Spliterator.ORDERED), + false); + return stream.map(Arguments::of); } /** diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/parsing/SymbolTest.java b/lang/java/avro/src/test/java/org/apache/avro/io/parsing/SymbolTest.java index c7d0213e61c..3321ba7b859 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/parsing/SymbolTest.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/parsing/SymbolTest.java @@ -15,14 +15,15 @@ */ package org.apache.avro.io.parsing; -import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.fail; import java.io.IOException; import java.util.HashSet; import java.util.Set; import org.apache.avro.Schema; -import org.junit.Test; +import org.apache.avro.SchemaParser; +import org.junit.jupiter.api.Test; /** * Unit test to verify that recursive schemas are flattened correctly. See @@ -41,8 +42,8 @@ public class SymbolTest { + " ]}},\n" + " {\"name\":\"node\",\"type\":\"SampleNode\"}]}}}]}"; @Test - public void testSomeMethod() throws IOException { - Schema schema = new Schema.Parser().parse(SCHEMA); + void someMethod() throws IOException { + Schema schema = SchemaParser.parseSingle(SCHEMA); Symbol root = new ResolvingGrammarGenerator().generate(schema, schema); validateNonNull(root, new HashSet<>()); } @@ -57,8 +58,7 @@ private static void validateNonNull(final Symbol symb, Set seen) { for (Symbol s : symb.production) { if (s == null) { fail("invalid parsing tree should not contain nulls"); - } - if (s.kind != Symbol.Kind.ROOT) { + } else if (s.kind != Symbol.Kind.ROOT) { validateNonNull(s, seen); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator.java b/lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator.java index 3587055b96d..71f128dab65 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator.java @@ -21,8 +21,10 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.StringReader; +import java.io.UncheckedIOException; import java.util.Arrays; import java.util.Collection; +import java.util.stream.Stream; import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.databind.JsonNode; @@ -30,6 +32,7 @@ import org.apache.avro.AvroTypeException; import org.apache.avro.Schema; import org.apache.avro.SchemaBuilder; +import org.apache.avro.SchemaParser; import org.apache.avro.file.DataFileStream; import org.apache.avro.file.DataFileWriter; import org.apache.avro.generic.GenericData; @@ -38,26 +41,21 @@ import org.apache.avro.generic.GenericRecordBuilder; import org.apache.avro.io.Encoder; import org.apache.avro.io.EncoderFactory; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -@RunWith(Parameterized.class) -public class TestResolvingGrammarGenerator { - private final Schema schema; - private final JsonNode data; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; - public TestResolvingGrammarGenerator(String jsonSchema, String jsonData) throws IOException { - this.schema = new Schema.Parser().parse(jsonSchema); - JsonFactory factory = new JsonFactory(); - ObjectMapper mapper = new ObjectMapper(factory); +import static org.apache.avro.TestSchemas.ENUM1_AB_SCHEMA_NAMESPACE_1; +import static org.apache.avro.TestSchemas.ENUM1_AB_SCHEMA_NAMESPACE_2; - this.data = mapper.readTree(new StringReader(jsonData)); - } +public class TestResolvingGrammarGenerator { - @Test - public void test() throws IOException { + @ParameterizedTest + @MethodSource("data") + void test(Schema schema, JsonNode data) throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); EncoderFactory factory = EncoderFactory.get(); Encoder e = factory.validatingEncoder(schema, factory.binaryEncoder(baos, null)); @@ -67,7 +65,7 @@ public void test() throws IOException { } @Test - public void testRecordMissingRequiredFieldError() throws Exception { + void recordMissingRequiredFieldError() throws Exception { Schema schemaWithoutField = SchemaBuilder.record("MyRecord").namespace("ns").fields().name("field1").type() .stringType().noDefault().endRecord(); Schema schemaWithField = SchemaBuilder.record("MyRecord").namespace("ns").fields().name("field1").type() @@ -76,27 +74,51 @@ public void testRecordMissingRequiredFieldError() throws Exception { byte[] data = writeRecord(schemaWithoutField, record); try { readRecord(schemaWithField, data); - Assert.fail("Expected exception not thrown"); + Assertions.fail("Expected exception not thrown"); } catch (AvroTypeException typeException) { - Assert.assertEquals("Incorrect exception message", - "Found ns.MyRecord, expecting ns.MyRecord, missing required field field2", typeException.getMessage()); + Assertions.assertEquals("Found ns.MyRecord, expecting ns.MyRecord, missing required field field2", + typeException.getMessage(), "Incorrect exception message"); } } - @Parameterized.Parameters - public static Collection data() { - Collection ret = Arrays.asList(new Object[][] { + @Test + void differingEnumNamespaces() throws Exception { + Schema schema1 = SchemaBuilder.record("MyRecord").fields().name("field").type(ENUM1_AB_SCHEMA_NAMESPACE_1) + .noDefault().endRecord(); + Schema schema2 = SchemaBuilder.record("MyRecord").fields().name("field").type(ENUM1_AB_SCHEMA_NAMESPACE_2) + .noDefault().endRecord(); + GenericData.EnumSymbol genericEnumSymbol = new GenericData.EnumSymbol(ENUM1_AB_SCHEMA_NAMESPACE_1, "A"); + GenericData.Record record = new GenericRecordBuilder(schema1).set("field", genericEnumSymbol).build(); + byte[] data = writeRecord(schema1, record); + Assertions.assertEquals(genericEnumSymbol, readRecord(schema1, data).get("field")); + Assertions.assertEquals(genericEnumSymbol, readRecord(schema2, data).get("field")); + } + + public static Stream data() { + Collection ret = Arrays.asList(new String[][] { { "{ \"type\": \"record\", \"name\": \"r\", \"fields\": [ " + " { \"name\" : \"f1\", \"type\": \"int\" }, " - + " { \"name\" : \"f2\", \"type\": \"float\" } " + "] } }", "{ \"f2\": 10.4, \"f1\": 10 } " }, - { "{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": " + "[ \"s1\", \"s2\"] } }", " \"s1\" " }, - { "{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": " + "[ \"s1\", \"s2\"] } }", " \"s2\" " }, + + " { \"name\" : \"f2\", \"type\": \"float\" } " + "] }", "{ \"f2\": 10.4, \"f1\": 10 } " }, + { "{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": " + "[ \"s1\", \"s2\"] }", " \"s1\" " }, + { "{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": " + "[ \"s1\", \"s2\"] }", " \"s2\" " }, { "{ \"type\": \"fixed\", \"name\": \"f\", \"size\": 10 }", "\"hello\"" }, { "{ \"type\": \"array\", \"items\": \"int\" }", "[ 10, 20, 30 ]" }, { "{ \"type\": \"map\", \"values\": \"int\" }", "{ \"k1\": 10, \"k3\": 20, \"k3\": 30 }" }, { "[ \"int\", \"long\" ]", "10" }, { "\"string\"", "\"hello\"" }, { "\"bytes\"", "\"hello\"" }, { "\"int\"", "10" }, { "\"long\"", "10" }, { "\"float\"", "10.0" }, { "\"double\"", "10.0" }, { "\"boolean\"", "true" }, { "\"boolean\"", "false" }, { "\"null\"", "null" }, }); - return ret; + + final JsonFactory factory = new JsonFactory(); + final ObjectMapper mapper = new ObjectMapper(factory); + + return ret.stream().map((String[] args) -> { + Schema schema = SchemaParser.parseSingle(args[0]); + try { + JsonNode data = mapper.readTree(new StringReader(args[1])); + return Arguments.of(schema, data); + } catch (IOException ex) { + throw new UncheckedIOException(ex); + } + }); } private byte[] writeRecord(Schema schema, GenericData.Record record) throws Exception { diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator2.java b/lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator2.java index fc698014d53..23348cb09ae 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator2.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator2.java @@ -24,6 +24,7 @@ import org.apache.avro.Schema; import org.apache.avro.SchemaBuilder; +import org.apache.avro.SchemaParser; import org.apache.avro.SchemaValidationException; import org.apache.avro.SchemaValidatorBuilder; import org.apache.avro.generic.GenericData; @@ -32,17 +33,17 @@ import org.apache.avro.io.DatumReader; import org.apache.avro.io.Decoder; import org.apache.avro.io.DecoderFactory; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.not; +import static org.junit.jupiter.api.Assertions.*; /** ResolvingGrammarGenerator tests that are not Parameterized. */ public class TestResolvingGrammarGenerator2 { @Test - public void testFixed() throws java.io.IOException { + void fixed() throws java.io.IOException { new ResolvingGrammarGenerator().generate(Schema.createFixed("MyFixed", null, null, 10), Schema.create(Schema.Type.BYTES)); new ResolvingGrammarGenerator().generate(Schema.create(Schema.Type.BYTES), @@ -63,69 +64,71 @@ public void testFixed() throws java.io.IOException { Schema point3dMatchName = SchemaBuilder.record("Point").fields().requiredDouble("x").requiredDouble("y").name("z") .type().doubleType().doubleDefault(0.0).endRecord(); - @Test(expected = SchemaValidationException.class) - public void testUnionResolutionNoStructureMatch() throws Exception { - // there is a short name match, but the structure does not match - Schema read = Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.NULL), point3dNoDefault)); - - new SchemaValidatorBuilder().canBeReadStrategy().validateAll().validate(point2dFullname, - Collections.singletonList(read)); + @Test + void unionResolutionNoStructureMatch() throws Exception { + assertThrows(SchemaValidationException.class, () -> { + // there is a short name match, but the structure does not match + Schema read = Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.NULL), point3dNoDefault)); + + new SchemaValidatorBuilder().canBeReadStrategy().validateAll().validate(point2dFullname, + Collections.singletonList(read)); + }); } @Test - public void testUnionResolutionFirstStructureMatch2d() throws Exception { + void unionResolutionFirstStructureMatch2d() throws Exception { // multiple structure matches with no short or full name matches Schema read = Schema .createUnion(Arrays.asList(Schema.create(Schema.Type.NULL), point3dNoDefault, point2d, point3d)); Symbol grammar = new ResolvingGrammarGenerator().generate(point2dFullname, read); - Assert.assertTrue(grammar.production[1] instanceof Symbol.UnionAdjustAction); + assertTrue(grammar.production[1] instanceof Symbol.UnionAdjustAction); Symbol.UnionAdjustAction action = (Symbol.UnionAdjustAction) grammar.production[1]; - Assert.assertEquals(2, action.rindex); + assertEquals(2, action.rindex); } @Test - public void testUnionResolutionFirstStructureMatch3d() throws Exception { + void unionResolutionFirstStructureMatch3d() throws Exception { // multiple structure matches with no short or full name matches Schema read = Schema .createUnion(Arrays.asList(Schema.create(Schema.Type.NULL), point3dNoDefault, point3d, point2d)); Symbol grammar = new ResolvingGrammarGenerator().generate(point2dFullname, read); - Assert.assertTrue(grammar.production[1] instanceof Symbol.UnionAdjustAction); + assertTrue(grammar.production[1] instanceof Symbol.UnionAdjustAction); Symbol.UnionAdjustAction action = (Symbol.UnionAdjustAction) grammar.production[1]; - Assert.assertEquals(2, action.rindex); + assertEquals(2, action.rindex); } @Test - public void testUnionResolutionNamedStructureMatch() throws Exception { + void unionResolutionNamedStructureMatch() throws Exception { // multiple structure matches with a short name match Schema read = Schema .createUnion(Arrays.asList(Schema.create(Schema.Type.NULL), point2d, point3dMatchName, point3d)); Symbol grammar = new ResolvingGrammarGenerator().generate(point2dFullname, read); - Assert.assertTrue(grammar.production[1] instanceof Symbol.UnionAdjustAction); + assertTrue(grammar.production[1] instanceof Symbol.UnionAdjustAction); Symbol.UnionAdjustAction action = (Symbol.UnionAdjustAction) grammar.production[1]; - Assert.assertEquals(2, action.rindex); + assertEquals(2, action.rindex); } @Test - public void testUnionResolutionFullNameMatch() throws Exception { + void unionResolutionFullNameMatch() throws Exception { // there is a full name match, so it should be chosen Schema read = Schema.createUnion( Arrays.asList(Schema.create(Schema.Type.NULL), point2d, point3dMatchName, point3d, point2dFullname)); Symbol grammar = new ResolvingGrammarGenerator().generate(point2dFullname, read); - Assert.assertTrue(grammar.production[1] instanceof Symbol.UnionAdjustAction); + assertTrue(grammar.production[1] instanceof Symbol.UnionAdjustAction); Symbol.UnionAdjustAction action = (Symbol.UnionAdjustAction) grammar.production[1]; - Assert.assertEquals(4, action.rindex); + assertEquals(4, action.rindex); } @Test - public void testAvro2702StringProperties() throws IOException { + void avro2702StringProperties() throws IOException { // Create a nested record schema with string fields at two levels. Schema inner = SchemaBuilder.builder().record("B").fields().requiredString("b1").endRecord(); @@ -133,7 +136,7 @@ public void testAvro2702StringProperties() throws IOException { .nullType().and().type(inner).endUnion().noDefault().endRecord(); // Make a copy with the two string fields annotated. - Schema outer2 = new Schema.Parser().parse(outer.toString()); + Schema outer2 = SchemaParser.parseSingle(outer.toString()); outer2.getField("a1").schema().addProp(GenericData.STRING_PROP, "String"); Schema inner2 = outer2.getField("inner").schema().getTypes().get(1); inner2.getField("b1").schema().addProp(GenericData.STRING_PROP, "String"); diff --git a/lang/java/avro/src/test/java/org/apache/avro/message/TestBinaryMessageEncoding.java b/lang/java/avro/src/test/java/org/apache/avro/message/TestBinaryMessageEncoding.java index 7c6bf1a180b..0e8583bf6c2 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/message/TestBinaryMessageEncoding.java +++ b/lang/java/avro/src/test/java/org/apache/avro/message/TestBinaryMessageEncoding.java @@ -19,6 +19,8 @@ package org.apache.avro.message; +import static org.junit.jupiter.api.Assertions.*; + import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; @@ -31,8 +33,7 @@ import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericData.Record; import org.apache.avro.generic.GenericRecordBuilder; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestBinaryMessageEncoding { private static final Schema SCHEMA_V1 = SchemaBuilder.record("TestRecord").fields().requiredInt("id") @@ -56,18 +57,18 @@ public class TestBinaryMessageEncoding { V2_BUILDER.set("id", 8L).set("message", "m-8").set("data", 35.6).build()); @Test - public void testByteBufferRoundTrip() throws Exception { + void byteBufferRoundTrip() throws Exception { MessageEncoder encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V2); MessageDecoder decoder = new BinaryMessageDecoder<>(GenericData.get(), SCHEMA_V2); Record copy = decoder.decode(encoder.encode(V2_RECORDS.get(0))); - Assert.assertNotSame("Copy should not be the same object", copy, V2_RECORDS.get(0)); - Assert.assertEquals("Record should be identical after round-trip", V2_RECORDS.get(0), copy); + assertNotSame(copy, V2_RECORDS.get(0), "Copy should not be the same object"); + assertEquals(V2_RECORDS.get(0), copy, "Record should be identical after round-trip"); } @Test - public void testSchemaEvolution() throws Exception { + void schemaEvolution() throws Exception { List buffers = new ArrayList<>(); List records = new ArrayList<>(); @@ -100,21 +101,23 @@ public void testSchemaEvolution() throws Exception { decodedUsingV2.add(v2Decoder.decode(buffer)); } - Assert.assertEquals(allAsV2, decodedUsingV2); + assertEquals(allAsV2, decodedUsingV2); } - @Test(expected = MissingSchemaException.class) - public void testCompatibleReadFailsWithoutSchema() throws Exception { - MessageEncoder v1Encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V1); - BinaryMessageDecoder v2Decoder = new BinaryMessageDecoder<>(GenericData.get(), SCHEMA_V2); + @Test + void compatibleReadFailsWithoutSchema() throws Exception { + assertThrows(MissingSchemaException.class, () -> { + MessageEncoder v1Encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V1); + BinaryMessageDecoder v2Decoder = new BinaryMessageDecoder<>(GenericData.get(), SCHEMA_V2); - ByteBuffer v1Buffer = v1Encoder.encode(V1_RECORDS.get(3)); + ByteBuffer v1Buffer = v1Encoder.encode(V1_RECORDS.get(3)); - v2Decoder.decode(v1Buffer); + v2Decoder.decode(v1Buffer); + }); } @Test - public void testCompatibleReadWithSchema() throws Exception { + void compatibleReadWithSchema() throws Exception { MessageEncoder v1Encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V1); BinaryMessageDecoder v2Decoder = new BinaryMessageDecoder<>(GenericData.get(), SCHEMA_V2); v2Decoder.addSchema(SCHEMA_V1); @@ -123,11 +126,11 @@ public void testCompatibleReadWithSchema() throws Exception { Record record = v2Decoder.decode(v1Buffer); - Assert.assertEquals(V2_BUILDER.set("id", 6L).set("message", "m-6").clear("data").build(), record); + assertEquals(V2_BUILDER.set("id", 6L).set("message", "m-6").clear("data").build(), record); } @Test - public void testCompatibleReadWithSchemaFromLookup() throws Exception { + void compatibleReadWithSchemaFromLookup() throws Exception { MessageEncoder v1Encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V1); SchemaStore.Cache schemaCache = new SchemaStore.Cache(); @@ -138,11 +141,11 @@ public void testCompatibleReadWithSchemaFromLookup() throws Exception { Record record = v2Decoder.decode(v1Buffer); - Assert.assertEquals(V2_BUILDER.set("id", 4L).set("message", "m-4").clear("data").build(), record); + assertEquals(V2_BUILDER.set("id", 4L).set("message", "m-4").clear("data").build(), record); } @Test - public void testIdenticalReadWithSchemaFromLookup() throws Exception { + void identicalReadWithSchemaFromLookup() throws Exception { MessageEncoder v1Encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V1); SchemaStore.Cache schemaCache = new SchemaStore.Cache(); @@ -155,11 +158,11 @@ public void testIdenticalReadWithSchemaFromLookup() throws Exception { Record record = genericDecoder.decode(v1Buffer); - Assert.assertEquals(V1_RECORDS.get(2), record); + assertEquals(V1_RECORDS.get(2), record); } @Test - public void testBufferReuse() throws Exception { + void bufferReuse() throws Exception { // This test depends on the serialized version of record 1 being smaller or // the same size as record 0 so that the reused ByteArrayOutputStream won't // expand its internal buffer. @@ -168,81 +171,91 @@ public void testBufferReuse() throws Exception { ByteBuffer b0 = encoder.encode(V1_RECORDS.get(0)); ByteBuffer b1 = encoder.encode(V1_RECORDS.get(1)); - Assert.assertEquals(b0.array(), b1.array()); + assertEquals(b0.array(), b1.array()); MessageDecoder decoder = new BinaryMessageDecoder<>(GenericData.get(), SCHEMA_V1); - Assert.assertEquals("Buffer was reused, decode(b0) should be record 1", V1_RECORDS.get(1), decoder.decode(b0)); + assertEquals(V1_RECORDS.get(1), decoder.decode(b0), "Buffer was reused, decode(b0) should be record 1"); } @Test - public void testBufferCopy() throws Exception { + void bufferCopy() throws Exception { MessageEncoder encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V1); ByteBuffer b0 = encoder.encode(V1_RECORDS.get(0)); ByteBuffer b1 = encoder.encode(V1_RECORDS.get(1)); - Assert.assertNotEquals(b0.array(), b1.array()); + assertNotEquals(b0.array(), b1.array()); MessageDecoder decoder = new BinaryMessageDecoder<>(GenericData.get(), SCHEMA_V1); // bytes are not changed by reusing the encoder - Assert.assertEquals("Buffer was copied, decode(b0) should be record 0", V1_RECORDS.get(0), decoder.decode(b0)); + assertEquals(V1_RECORDS.get(0), decoder.decode(b0), "Buffer was copied, decode(b0) should be record 0"); } - @Test(expected = AvroRuntimeException.class) - public void testByteBufferMissingPayload() throws Exception { - MessageEncoder encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V2); - MessageDecoder decoder = new BinaryMessageDecoder<>(GenericData.get(), SCHEMA_V2); + @Test + void byteBufferMissingPayload() throws Exception { + assertThrows(AvroRuntimeException.class, () -> { + MessageEncoder encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V2); + MessageDecoder decoder = new BinaryMessageDecoder<>(GenericData.get(), SCHEMA_V2); - ByteBuffer buffer = encoder.encode(V2_RECORDS.get(0)); + ByteBuffer buffer = encoder.encode(V2_RECORDS.get(0)); - buffer.limit(12); + buffer.limit(12); - decoder.decode(buffer); + decoder.decode(buffer); + }); } - @Test(expected = BadHeaderException.class) - public void testByteBufferMissingFullHeader() throws Exception { - MessageEncoder encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V2); - MessageDecoder decoder = new BinaryMessageDecoder<>(GenericData.get(), SCHEMA_V2); + @Test + void byteBufferMissingFullHeader() throws Exception { + assertThrows(BadHeaderException.class, () -> { + MessageEncoder encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V2); + MessageDecoder decoder = new BinaryMessageDecoder<>(GenericData.get(), SCHEMA_V2); - ByteBuffer buffer = encoder.encode(V2_RECORDS.get(0)); + ByteBuffer buffer = encoder.encode(V2_RECORDS.get(0)); - buffer.limit(8); + buffer.limit(8); - decoder.decode(buffer); + decoder.decode(buffer); + }); } - @Test(expected = BadHeaderException.class) - public void testByteBufferBadMarkerByte() throws Exception { - MessageEncoder encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V2); - MessageDecoder decoder = new BinaryMessageDecoder<>(GenericData.get(), SCHEMA_V2); + @Test + void byteBufferBadMarkerByte() throws Exception { + assertThrows(BadHeaderException.class, () -> { + MessageEncoder encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V2); + MessageDecoder decoder = new BinaryMessageDecoder<>(GenericData.get(), SCHEMA_V2); - ByteBuffer buffer = encoder.encode(V2_RECORDS.get(0)); - buffer.array()[0] = 0x00; + ByteBuffer buffer = encoder.encode(V2_RECORDS.get(0)); + buffer.array()[0] = 0x00; - decoder.decode(buffer); + decoder.decode(buffer); + }); } - @Test(expected = BadHeaderException.class) - public void testByteBufferBadVersionByte() throws Exception { - MessageEncoder encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V2); - MessageDecoder decoder = new BinaryMessageDecoder<>(GenericData.get(), SCHEMA_V2); + @Test + void byteBufferBadVersionByte() throws Exception { + assertThrows(BadHeaderException.class, () -> { + MessageEncoder encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V2); + MessageDecoder decoder = new BinaryMessageDecoder<>(GenericData.get(), SCHEMA_V2); - ByteBuffer buffer = encoder.encode(V2_RECORDS.get(0)); - buffer.array()[1] = 0x00; + ByteBuffer buffer = encoder.encode(V2_RECORDS.get(0)); + buffer.array()[1] = 0x00; - decoder.decode(buffer); + decoder.decode(buffer); + }); } - @Test(expected = MissingSchemaException.class) - public void testByteBufferUnknownSchema() throws Exception { - MessageEncoder encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V2); - MessageDecoder decoder = new BinaryMessageDecoder<>(GenericData.get(), SCHEMA_V2); + @Test + void byteBufferUnknownSchema() throws Exception { + assertThrows(MissingSchemaException.class, () -> { + MessageEncoder encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V2); + MessageDecoder decoder = new BinaryMessageDecoder<>(GenericData.get(), SCHEMA_V2); - ByteBuffer buffer = encoder.encode(V2_RECORDS.get(0)); - buffer.array()[4] = 0x00; + ByteBuffer buffer = encoder.encode(V2_RECORDS.get(0)); + buffer.array()[4] = 0x00; - decoder.decode(buffer); + decoder.decode(buffer); + }); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/message/TestGenerateInteropSingleObjectEncoding.java b/lang/java/avro/src/test/java/org/apache/avro/message/TestGenerateInteropSingleObjectEncoding.java new file mode 100644 index 00000000000..27bb0920ae5 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/message/TestGenerateInteropSingleObjectEncoding.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.avro.message; + +import org.apache.avro.Schema; +import org.apache.avro.SchemaParser; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericRecordBuilder; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Arrays; + +/** + * Generates test_message.bin - a single + * object encoded Avro message. + */ +public class TestGenerateInteropSingleObjectEncoding { + private static final String RESOURCES_FOLDER = System.getProperty("share.dir", "target/test-classes/share") + + "/test/data/messageV1"; + private static final File SCHEMA_FILE = new File(RESOURCES_FOLDER + "/test_schema.avsc"); + private static final File MESSAGE_FILE = new File(RESOURCES_FOLDER + "/test_message.bin"); + private static Schema SCHEMA; + private static GenericRecordBuilder BUILDER; + + @BeforeAll + public static void setup() throws IOException { + try (FileInputStream fileInputStream = new FileInputStream(SCHEMA_FILE)) { + SCHEMA = new SchemaParser().parse(fileInputStream).mainSchema(); + BUILDER = new GenericRecordBuilder(SCHEMA); + } + } + + @Test + void generateData() throws IOException { + MessageEncoder encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA); + BUILDER.set("id", 42L).set("name", "Bill").set("tags", Arrays.asList("dog_lover", "cat_hater")).build(); + ByteBuffer buffer = encoder.encode( + BUILDER.set("id", 42L).set("name", "Bill").set("tags", Arrays.asList("dog_lover", "cat_hater")).build()); + new FileOutputStream(MESSAGE_FILE).write(buffer.array()); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/message/TestInteropSingleObjectEncoding.java b/lang/java/avro/src/test/java/org/apache/avro/message/TestInteropSingleObjectEncoding.java new file mode 100644 index 00000000000..09dd9f14f8e --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/message/TestInteropSingleObjectEncoding.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.avro.message; + +import org.apache.avro.Schema; +import org.apache.avro.SchemaParser; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericRecordBuilder; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import java.io.File; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import java.io.FileInputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.file.Files; +import java.util.Arrays; + +/** + * Tests that test_message.bin is properly encoded single + * object + */ +public class TestInteropSingleObjectEncoding { + private static final String RESOURCES_FOLDER = System.getProperty("share.dir", "target/test-classes/share") + + "/test/data/messageV1"; + private static final File SCHEMA_FILE = new File(RESOURCES_FOLDER + "/test_schema.avsc"); + private static final File MESSAGE_FILE = new File(RESOURCES_FOLDER + "/test_message.bin"); + private static Schema SCHEMA; + private static GenericRecordBuilder BUILDER; + + @BeforeAll + public static void setup() throws IOException { + try (FileInputStream fileInputStream = new FileInputStream(SCHEMA_FILE)) { + SCHEMA = new SchemaParser().parse(fileInputStream).mainSchema(); + BUILDER = new GenericRecordBuilder(SCHEMA); + } + } + + @Test + void checkSingleObjectEncoding() throws IOException { + MessageEncoder encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA); + ByteBuffer buffer = encoder.encode( + BUILDER.set("id", 42L).set("name", "Bill").set("tags", Arrays.asList("dog_lover", "cat_hater")).build()); + byte[] fileBuffer = Files.readAllBytes(MESSAGE_FILE.toPath()); + assertArrayEquals(fileBuffer, buffer.array()); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestAvroEncode.java b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestAvroEncode.java new file mode 100644 index 00000000000..7da7170e1b6 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestAvroEncode.java @@ -0,0 +1,181 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.reflect; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.Arrays; + +import org.apache.avro.AvroTypeException; +import org.apache.avro.Schema; +import org.apache.avro.io.Decoder; +import org.apache.avro.io.DecoderFactory; +import org.apache.avro.io.Encoder; +import org.apache.avro.io.EncoderFactory; +import org.junit.jupiter.api.Test; + +public class TestAvroEncode { + EncoderFactory factory = new EncoderFactory(); + + @Test + void testWithinClass() throws IOException { + + var wrapper = new Wrapper(new R1("321")); + + var read = readWrite(wrapper); + + assertEquals("321", wrapper.getR1().getValue()); + assertEquals("321 used this", read.getR1().getValue()); + } + + @Test + void testDirect() throws IOException { + + var r1 = new R1("123"); + + var read = readWrite(r1); + + assertEquals("123", r1.getValue()); + assertEquals("123 used this", read.getValue()); + } + + @Test + void testFieldAnnotationTakesPrecedence() throws IOException { + + var wrapper = new OtherWrapper(new R1("test")); + + var read = readWrite(wrapper); + + assertEquals("test", wrapper.getR1().getValue()); + assertEquals("test used other", read.getR1().getValue()); + } + + public static class Wrapper { + + private R1 r1; + + public Wrapper() { + } + + public Wrapper(R1 r1) { + this.r1 = r1; + } + + public R1 getR1() { + return r1; + } + + public void setR1(R1 r1) { + this.r1 = r1; + } + + } + + public static class OtherWrapper { + @AvroEncode(using = R1EncodingOther.class) + private R1 r1; + + public OtherWrapper() { + } + + public OtherWrapper(R1 r1) { + this.r1 = r1; + } + + public R1 getR1() { + return r1; + } + + public void setR1(R1 r1) { + this.r1 = r1; + } + + } + + @AvroEncode(using = R1Encoding.class) + public static class R1 { + + private final String value; + + public R1(String value) { + this.value = value; + } + + public String getValue() { + return value; + } + + } + + public static class R1Encoding extends CustomEncoding { + + { + schema = Schema.createRecord("R1", null, "org.apache.avro.reflect.TestAvroEncode", false, + Arrays.asList(new Schema.Field("value", Schema.create(Schema.Type.INT), null, null))); + } + + @Override + protected void write(Object datum, Encoder out) throws IOException { + if (datum instanceof R1) { + var value = ((R1) datum).getValue(); + out.writeInt(Integer.parseInt(value)); + } else { + throw new AvroTypeException("Expected R1, got " + datum.getClass()); + } + + } + + @Override + protected R1 read(Object reuse, Decoder in) throws IOException { + return new R1(in.readInt() + " used this"); + } + } + + public static class R1EncodingOther extends CustomEncoding { + + { + schema = Schema.createRecord("R1", null, null, false, + Arrays.asList(new Schema.Field("value", Schema.create(Schema.Type.STRING), null, null))); + } + + @Override + protected void write(Object datum, Encoder out) throws IOException { + if (datum instanceof R1) { + out.writeString(((R1) datum).getValue()); + } else { + throw new AvroTypeException("Expected R1, got " + datum.getClass()); + } + } + + @Override + protected R1 read(Object reuse, Decoder in) throws IOException { + return new R1(in.readString() + " used other"); + } + } + + T readWrite(T object) throws IOException { + var schema = new ReflectData().getSchema(object.getClass()); + ReflectDatumWriter writer = new ReflectDatumWriter<>(schema); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + writer.write(object, factory.directBinaryEncoder(out, null)); + ReflectDatumReader reader = new ReflectDatumReader<>(schema); + return reader.read(null, DecoderFactory.get().binaryDecoder(out.toByteArray(), null)); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestByteBuffer.java b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestByteBuffer.java index d0ef4312969..ae7869d1ccb 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestByteBuffer.java +++ b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestByteBuffer.java @@ -19,9 +19,7 @@ package org.apache.avro.reflect; import static java.nio.charset.StandardCharsets.UTF_8; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.*; import java.io.ByteArrayOutputStream; import java.io.File; @@ -41,15 +39,14 @@ import org.apache.avro.file.FileReader; import org.apache.avro.file.SeekableByteArrayInput; import org.apache.avro.io.DatumWriter; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; public class TestByteBuffer { - @Rule - public TemporaryFolder DIR = new TemporaryFolder(); + @TempDir + public File DIR; static class X { String name = ""; @@ -58,9 +55,9 @@ static class X { File content; - @Before + @BeforeEach public void before() throws IOException { - content = new File(DIR.getRoot().getPath(), "test-content"); + content = new File(DIR.getPath(), "test-content"); try (FileOutputStream out = new FileOutputStream(content)) { for (int i = 0; i < 100000; i++) { out.write("hello world\n".getBytes(UTF_8)); @@ -69,7 +66,7 @@ public void before() throws IOException { } @Test - public void test() throws Exception { + void test() throws Exception { Schema schema = ReflectData.get().getSchema(X.class); ByteArrayOutputStream bout = new ByteArrayOutputStream(); writeOneXAsAvro(schema, bout); @@ -77,7 +74,7 @@ public void test() throws Exception { String expected = getmd5(content); String actual = getmd5(record.content); - assertEquals("md5 for result differed from input", expected, actual); + assertEquals(expected, actual, "md5 for result differed from input"); } private X readOneXFromAvro(Schema schema, ByteArrayOutputStream bout) throws IOException { @@ -85,9 +82,9 @@ private X readOneXFromAvro(Schema schema, ByteArrayOutputStream bout) throws IOE ReflectDatumReader datumReader = new ReflectDatumReader<>(schema); FileReader reader = DataFileReader.openReader(input, datumReader); Iterator it = reader.iterator(); - assertTrue("missing first record", it.hasNext()); + assertTrue(it.hasNext(), "missing first record"); X record = it.next(); - assertFalse("should be no more records - only wrote one out", it.hasNext()); + assertFalse(it.hasNext(), "should be no more records - only wrote one out"); return record; } diff --git a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestNonStringMapKeys.java b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestNonStringMapKeys.java index 70cb7b65aa3..6b031fb2186 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestNonStringMapKeys.java +++ b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestNonStringMapKeys.java @@ -18,10 +18,7 @@ package org.apache.avro.reflect; import static java.nio.charset.StandardCharsets.UTF_8; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.*; import java.io.ByteArrayOutputStream; import java.io.IOException; @@ -45,7 +42,7 @@ import org.apache.avro.io.Encoder; import org.apache.avro.io.EncoderFactory; import org.apache.avro.util.Utf8; -import org.junit.Test; +import org.junit.jupiter.api.Test; /** * Test serialization and de-serialization of non-string map-keys @@ -53,7 +50,7 @@ public class TestNonStringMapKeys { @Test - public void testNonStringMapKeys() throws Exception { + void nonStringMapKeys() throws Exception { Company entityObj1 = buildCompany(); Company entityObj2 = buildCompany(); @@ -65,7 +62,7 @@ public void testNonStringMapKeys() throws Exception { GenericRecord record = records.get(0); Object employees = record.get("employees"); - assertTrue("Unable to read 'employees' map", employees instanceof GenericArray); + assertTrue(employees instanceof GenericArray, "Unable to read 'employees' map"); GenericArray arrayEmployees = ((GenericArray) employees); Object employeeRecord = arrayEmployees.get(0); assertTrue(employeeRecord instanceof GenericRecord); @@ -90,13 +87,13 @@ public void testNonStringMapKeys() throws Exception { } byte[] jsonBytes = testJsonEncoder(testType, entityObj1); - assertNotNull("Unable to serialize using jsonEncoder", jsonBytes); + assertNotNull(jsonBytes, "Unable to serialize using jsonEncoder"); GenericRecord jsonRecord = testJsonDecoder(testType, jsonBytes, entityObj1); - assertEquals("JSON decoder output not same as Binary Decoder", record, jsonRecord); + assertEquals(record, jsonRecord, "JSON decoder output not same as Binary Decoder"); } @Test - public void testNonStringMapKeysInNestedMaps() throws Exception { + void nonStringMapKeysInNestedMaps() throws Exception { Company2 entityObj1 = buildCompany2(); @@ -107,7 +104,7 @@ public void testNonStringMapKeysInNestedMaps() throws Exception { GenericRecord record = records.get(0); Object employees = record.get("employees"); - assertTrue("Unable to read 'employees' map", employees instanceof GenericArray); + assertTrue(employees instanceof GenericArray, "Unable to read 'employees' map"); GenericArray employeesMapArray = ((GenericArray) employees); Object employeeMapElement = employeesMapArray.get(0); @@ -146,13 +143,13 @@ public void testNonStringMapKeysInNestedMaps() throws Exception { } byte[] jsonBytes = testJsonEncoder(testType, entityObj1); - assertNotNull("Unable to serialize using jsonEncoder", jsonBytes); + assertNotNull(jsonBytes, "Unable to serialize using jsonEncoder"); GenericRecord jsonRecord = testJsonDecoder(testType, jsonBytes, entityObj1); - assertEquals("JSON decoder output not same as Binary Decoder", record, jsonRecord); + assertEquals(record, jsonRecord, "JSON decoder output not same as Binary Decoder"); } @Test - public void testRecordNameInvariance() throws Exception { + void recordNameInvariance() throws Exception { SameMapSignature entityObj1 = buildSameMapSignature(); @@ -163,7 +160,7 @@ public void testRecordNameInvariance() throws Exception { GenericRecord record = records.get(0); Object map1obj = record.get("map1"); - assertTrue("Unable to read map1", map1obj instanceof GenericArray); + assertTrue(map1obj instanceof GenericArray, "Unable to read map1"); GenericArray map1array = ((GenericArray) map1obj); Object map1element = map1array.get(0); @@ -207,10 +204,10 @@ public void testRecordNameInvariance() throws Exception { assertEquals(map1schema, map4schema); byte[] jsonBytes = testJsonEncoder(testType, entityObj1); - assertNotNull("Unable to serialize using jsonEncoder", jsonBytes); + assertNotNull(jsonBytes, "Unable to serialize using jsonEncoder"); GenericRecord jsonRecord = testJsonDecoder(testType, jsonBytes, entityObj1); - assertEquals("JSON decoder output not same as Binary Decoder", record.get("map1"), jsonRecord.get("map1")); - assertEquals("JSON decoder output not same as Binary Decoder", record.get("map2"), jsonRecord.get("map2")); + assertEquals(record.get("map1"), jsonRecord.get("map1"), "JSON decoder output not same as Binary Decoder"); + assertEquals(record.get("map2"), jsonRecord.get("map2"), "JSON decoder output not same as Binary Decoder"); } /** @@ -223,7 +220,7 @@ public byte[] testSerialization(String testType, T... entityObjs) throws Exc ReflectData rdata = ReflectData.AllowNull.get(); Schema schema = rdata.getSchema(entityObj1.getClass()); - assertNotNull("Unable to get schema for " + testType, schema); + assertNotNull(schema, "Unable to get schema for " + testType); log(schema.toString(true)); ReflectDatumWriter datumWriter = new ReflectDatumWriter(entityObj1.getClass(), rdata); @@ -252,7 +249,7 @@ private List testGenericDatumRead(String testType, byte[] byt try (DataFileReader fileReader = new DataFileReader<>(avroInputStream, datumReader)) { Schema schema = fileReader.getSchema(); - assertNotNull("Unable to get schema for " + testType, schema); + assertNotNull(schema, "Unable to get schema for " + testType); GenericRecord record = null; while (fileReader.hasNext()) { try { diff --git a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java index e3065d59b87..cf0e99756eb 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java +++ b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java @@ -20,9 +20,7 @@ import static java.nio.charset.StandardCharsets.UTF_8; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.is; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.*; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -35,14 +33,19 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Objects; +import java.util.Optional; import java.util.Random; import org.apache.avro.AvroRuntimeException; import org.apache.avro.AvroTypeException; import org.apache.avro.JsonProperties; +import org.apache.avro.JsonSchemaParser; +import org.apache.avro.NameValidator; import org.apache.avro.Protocol; import org.apache.avro.Schema; import org.apache.avro.Schema.Field; import org.apache.avro.SchemaBuilder; +import org.apache.avro.SchemaParser; import org.apache.avro.generic.GenericData; import org.apache.avro.io.Decoder; import org.apache.avro.io.DecoderFactory; @@ -50,7 +53,10 @@ import org.apache.avro.io.EncoderFactory; import org.apache.avro.reflect.TestReflect.SampleRecord.AnotherSampleRecord; import org.apache.avro.util.Utf8; -import org.junit.Test; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.DisabledIfEnvironmentVariable; +import org.junit.jupiter.api.condition.EnabledForJreRange; +import org.junit.jupiter.api.condition.JRE; public class TestReflect { @@ -58,87 +64,87 @@ public class TestReflect { // test primitive type inference @Test - public void testVoid() { + void testVoid() { check(Void.TYPE, "\"null\""); check(Void.class, "\"null\""); } @Test - public void testBoolean() { + void testBoolean() { check(Boolean.TYPE, "\"boolean\""); check(Boolean.class, "\"boolean\""); } @Test - public void testInt() { + void testInt() { check(Integer.TYPE, "\"int\""); check(Integer.class, "\"int\""); } @Test - public void testByte() { + void testByte() { check(Byte.TYPE, "{\"type\":\"int\",\"java-class\":\"java.lang.Byte\"}"); check(Byte.class, "{\"type\":\"int\",\"java-class\":\"java.lang.Byte\"}"); } @Test - public void testShort() { + void testShort() { check(Short.TYPE, "{\"type\":\"int\",\"java-class\":\"java.lang.Short\"}"); check(Short.class, "{\"type\":\"int\",\"java-class\":\"java.lang.Short\"}"); } @Test - public void testChar() { + void testChar() { check(Character.TYPE, "{\"type\":\"int\",\"java-class\":\"java.lang.Character\"}"); check(Character.class, "{\"type\":\"int\",\"java-class\":\"java.lang.Character\"}"); } @Test - public void testLong() { + void testLong() { check(Long.TYPE, "\"long\""); check(Long.class, "\"long\""); } @Test - public void testFloat() { + void testFloat() { check(Float.TYPE, "\"float\""); check(Float.class, "\"float\""); } @Test - public void testDouble() { + void testDouble() { check(Double.TYPE, "\"double\""); check(Double.class, "\"double\""); } @Test - public void testString() { + void string() { check("Foo", "\"string\""); } @Test - public void testBytes() { + void bytes() { check(ByteBuffer.allocate(0), "\"bytes\""); check(new byte[0], "{\"type\":\"bytes\",\"java-class\":\"[B\"}"); } @Test - public void testUnionWithCollection() { - Schema s = new Schema.Parser().parse("[\"null\", {\"type\":\"array\",\"items\":\"float\"}]"); + void unionWithCollection() { + Schema s = SchemaParser.parseSingle("[\"null\", {\"type\":\"array\",\"items\":\"float\"}]"); GenericData data = ReflectData.get(); assertEquals(1, data.resolveUnion(s, new ArrayList())); } @Test - public void testUnionWithMap() { - Schema s = new Schema.Parser().parse("[\"null\", {\"type\":\"map\",\"values\":\"float\"}]"); + void unionWithMap() { + Schema s = SchemaParser.parseSingle("[\"null\", {\"type\":\"map\",\"values\":\"float\"}]"); GenericData data = ReflectData.get(); assertEquals(1, data.resolveUnion(s, new HashMap())); } @Test - public void testUnionWithMapWithUtf8Keys() { - Schema s = new Schema.Parser().parse("[\"null\", {\"type\":\"map\",\"values\":\"float\"}]"); + void unionWithMapWithUtf8Keys() { + Schema s = SchemaParser.parseSingle("[\"null\", {\"type\":\"map\",\"values\":\"float\"}]"); GenericData data = ReflectData.get(); HashMap map = new HashMap<>(); map.put(new Utf8("foo"), 1.0f); @@ -146,24 +152,24 @@ public void testUnionWithMapWithUtf8Keys() { } @Test - public void testUnionWithFixed() { - Schema s = new Schema.Parser().parse("[\"null\", {\"type\":\"fixed\",\"name\":\"f\",\"size\":1}]"); - Schema f = new Schema.Parser().parse("{\"type\":\"fixed\",\"name\":\"f\",\"size\":1}"); + void unionWithFixed() { + Schema s = SchemaParser.parseSingle("[\"null\", {\"type\":\"fixed\",\"name\":\"f\",\"size\":1}]"); + Schema f = SchemaParser.parseSingle("{\"type\":\"fixed\",\"name\":\"f\",\"size\":1}"); GenericData data = ReflectData.get(); assertEquals(1, data.resolveUnion(s, new GenericData.Fixed(f))); } @Test - public void testUnionWithEnum() { - Schema s = new Schema.Parser().parse("[\"null\", {\"type\":\"enum\",\"name\":\"E\",\"namespace\":" + void unionWithEnum() { + Schema s = SchemaParser.parseSingle("[\"null\", {\"type\":\"enum\",\"name\":\"E\",\"namespace\":" + "\"org.apache.avro.reflect.TestReflect\",\"symbols\":[\"A\",\"B\"]}]"); GenericData data = ReflectData.get(); assertEquals(1, data.resolveUnion(s, E.A)); } @Test - public void testUnionWithBytes() { - Schema s = new Schema.Parser().parse("[\"null\", \"bytes\"]"); + void unionWithBytes() { + Schema s = SchemaParser.parseSingle("[\"null\", \"bytes\"]"); GenericData data = ReflectData.get(); assertEquals(1, data.resolveUnion(s, ByteBuffer.wrap(new byte[] { 1 }))); } @@ -190,24 +196,24 @@ public boolean equals(Object o) { } @Test - public void testMap() throws Exception { + void map() throws Exception { check(R1.class.getDeclaredField("mapField").getGenericType(), "{\"type\":\"map\",\"values\":\"string\"}"); } @Test - public void testArray() throws Exception { + void array() throws Exception { check(R1.class.getDeclaredField("arrayField").getGenericType(), "{\"type\":\"array\",\"items\":\"string\",\"java-class\":\"[Ljava.lang.String;\"}"); } @Test - public void testList() throws Exception { + void list() throws Exception { check(R1.class.getDeclaredField("listField").getGenericType(), "{\"type\":\"array\",\"items\":\"string\"" + ",\"java-class\":\"java.util.List\"}"); } @Test - public void testR1() throws Exception { + void r1() throws Exception { checkReadWrite(new R1()); } @@ -226,7 +232,7 @@ public boolean equals(Object o) { } @Test - public void testR2() throws Exception { + void r2() throws Exception { R2 r2 = new R2(); r2.arrayField = new String[] { "foo" }; r2.collectionField = new ArrayList<>(); @@ -248,7 +254,7 @@ public boolean equals(Object o) { } @Test - public void testR3() throws Exception { + void r3() throws Exception { R3 r3 = new R3(); r3.intArray = new int[] { 1 }; checkReadWrite(r3); @@ -275,7 +281,7 @@ public static class R5 extends R4 { } @Test - public void testR5() throws Exception { + void r5() throws Exception { R5 r5 = new R5(); r5.value = 1; r5.shorts = new short[] { 3, 255, 256, Short.MAX_VALUE, Short.MIN_VALUE }; @@ -324,7 +330,7 @@ public boolean equals(Object o) { } @Test - public void testR6() throws Exception { + void r6() throws Exception { R7 r7 = new R7(); r7.value = 1; checkReadWrite(r7, ReflectData.get().getSchema(R6.class)); @@ -352,7 +358,7 @@ public boolean equals(Object o) { } @Test - public void testR6_1() throws Exception { + void r6_1() throws Exception { R7 r7 = new R7(); r7.value = 1; checkReadWrite(r7, ReflectData.get().getSchema(R6.class)); @@ -375,7 +381,8 @@ public static interface P0 { } @Test - public void testP0() throws Exception { + @DisabledIfEnvironmentVariable(named = "WithinInvokerPlugin", matches = "true", disabledReason = "Doesn't work, no clue why") + void p0() throws Exception { Protocol p0 = ReflectData.get().getProtocol(P0.class); Protocol.Message message = p0.getMessages().get("foo"); // check response schema is union @@ -386,7 +393,11 @@ public void testP0() throws Exception { // check request schema is union Schema request = message.getRequest(); Field field = request.getField("s"); - assertNotNull("field 's' should not be null", field); + // FIXME: Figure out why this test fails under the invoker plugin and succeeds + // while normal testing + // [ERROR] TestReflect.p0:393 field 's' should not be null ==> expected: not + // + assertNotNull(field, "field 's' should not be null"); Schema param = field.schema(); assertEquals(Schema.Type.UNION, param.getType()); assertEquals(Schema.Type.NULL, param.getTypes().get(0).getType()); @@ -419,7 +430,7 @@ public boolean equals(Object o) { } @Test - public void testR10() throws Exception { + void r10() throws Exception { Schema r10Schema = ReflectData.get().getSchema(R10.class); assertEquals(Schema.Type.STRING, r10Schema.getType()); assertEquals(R10.class.getName(), r10Schema.getProp("java-class")); @@ -443,7 +454,7 @@ public boolean equals(Object o) { } @Test - public void testR11() throws Exception { + void r11() throws Exception { Schema r11Record = ReflectData.get().getSchema(R11.class); assertEquals(Schema.Type.RECORD, r11Record.getType()); Field r11Field = r11Record.getField("text"); @@ -466,7 +477,8 @@ public static interface P1 { } @Test - public void testP1() throws Exception { + @DisabledIfEnvironmentVariable(named = "WithinInvokerPlugin", matches = "true", disabledReason = "Doesn't work, no clue why") + void p1() throws Exception { Protocol p1 = ReflectData.get().getProtocol(P1.class); Protocol.Message message = p1.getMessages().get("foo"); // check response schema is union @@ -477,7 +489,11 @@ public void testP1() throws Exception { // check request schema is union Schema request = message.getRequest(); Field field = request.getField("s"); - assertNotNull("field 's' should not be null", field); + // FIXME: Figure out why this test fails under the invoker plugin and succeeds + // while normal testing + // [ERROR] TestReflect.p1:484 field 's' should not be null ==> expected: not + // + assertNotNull(field, "field 's' should not be null"); Schema param = field.schema(); assertEquals(Schema.Type.UNION, param.getType()); assertEquals(Schema.Type.NULL, param.getTypes().get(0).getType()); @@ -497,19 +513,19 @@ public static class R12 { // fields } @Test - public void testR12() throws Exception { + void r12() throws Exception { Schema s = ReflectData.get().getSchema(R12.class); assertEquals(Schema.Type.INT, s.getField("x").schema().getType()); - assertEquals(new Schema.Parser().parse("{\"type\":\"array\",\"items\":[\"null\",\"string\"]}"), + assertEquals(SchemaParser.parseSingle("{\"type\":\"array\",\"items\":[\"null\",\"string\"]}"), s.getField("strings").schema()); } @AvroSchema("\"null\"") // record - public class R13 { + public static class R13 { } @Test - public void testR13() throws Exception { + void r13() throws Exception { Schema s = ReflectData.get().getSchema(R13.class); assertEquals(Schema.Type.NULL, s.getType()); } @@ -520,7 +536,11 @@ public interface P4 { } @Test - public void testP4() throws Exception { + // FIXME: Figure out why this test fails under the invoker plugin and succeeds + // while normal testing + // [ERROR] TestReflect.p4:532 NullPointer + @DisabledIfEnvironmentVariable(named = "WithinInvokerPlugin", matches = "true", disabledReason = "Doesn't work, no clue why") + void p4() throws Exception { Protocol p = ReflectData.get().getProtocol(P4.class); Protocol.Message message = p.getMessages().get("foo"); assertEquals(Schema.Type.INT, message.getResponse().getType()); @@ -537,13 +557,41 @@ public static interface P2 { void error() throws E1; } + private static class NullableDefaultTest { + @Nullable + @AvroDefault("1") + int foo; + } + + @Test + public void testAvroNullableDefault() { + check(NullableDefaultTest.class, + "{\"type\":\"record\",\"name\":\"NullableDefaultTest\"," + + "\"namespace\":\"org.apache.avro.reflect.TestReflect\",\"fields\":[" + + "{\"name\":\"foo\",\"type\":[\"null\",\"int\"],\"default\":1}]}"); + } + + private static class UnionDefaultTest { + @Union({ Integer.class, String.class }) + @AvroDefault("1") + Object foo; + } + + @Test + public void testAvroUnionDefault() { + check(UnionDefaultTest.class, + "{\"type\":\"record\",\"name\":\"UnionDefaultTest\"," + + "\"namespace\":\"org.apache.avro.reflect.TestReflect\",\"fields\":[" + + "{\"name\":\"foo\",\"type\":[\"int\",\"string\"],\"default\":1}]}"); + } + @Test - public void testP2() throws Exception { + void p2() throws Exception { Schema e1 = ReflectData.get().getSchema(E1.class); assertEquals(Schema.Type.RECORD, e1.getType()); assertTrue(e1.isError()); Field message = e1.getField("detailMessage"); - assertNotNull("field 'detailMessage' should not be null", message); + assertNotNull(message, "field 'detailMessage' should not be null"); Schema messageSchema = message.schema(); assertEquals(Schema.Type.UNION, messageSchema.getType()); assertEquals(Schema.Type.NULL, messageSchema.getTypes().get(0).getType()); @@ -559,7 +607,7 @@ public void testP2() throws Exception { } @Test - public void testNoPackage() throws Exception { + void noPackage() throws Exception { Class noPackage = Class.forName("NoPackage"); Schema s = ReflectData.get().getSchema(noPackage); assertEquals(noPackage.getName(), ReflectData.getClassName(s)); @@ -570,6 +618,7 @@ void checkReadWrite(Object object) throws Exception { } void checkReadWrite(Object object, Schema s) throws Exception { + ReflectDatumWriter writer = new ReflectDatumWriter<>(s); ByteArrayOutputStream out = new ByteArrayOutputStream(); writer.write(object, factory.directBinaryEncoder(out, null)); @@ -584,16 +633,16 @@ void checkReadWrite(Object object, Schema s) throws Exception { Object val = ReflectData.get().getField(object, f.name(), f.pos()); ReflectData.get().setField(copy, f.name(), f.pos(), val); } - assertEquals("setField", object, copy); + assertEquals(object, copy, "setField"); } } public static enum E { A, B - }; + } @Test - public void testEnum() throws Exception { + void testEnum() throws Exception { check(E.class, "{\"type\":\"enum\",\"name\":\"E\",\"namespace\":" + "\"org.apache.avro.reflect.TestReflect\",\"symbols\":[\"A\",\"B\"]}"); } @@ -604,7 +653,7 @@ public static class R { } @Test - public void testRecord() throws Exception { + void record() throws Exception { check(R.class, "{\"type\":\"record\",\"name\":\"R\",\"namespace\":" + "\"org.apache.avro.reflect.TestReflect\",\"fields\":[" + "{\"name\":\"a\",\"type\":\"int\"}," + "{\"name\":\"b\",\"type\":\"long\"}]}"); @@ -616,7 +665,7 @@ public static class RAvroIgnore { } @Test - public void testAnnotationAvroIgnore() throws Exception { + void annotationAvroIgnore() throws Exception { check(RAvroIgnore.class, "{\"type\":\"record\",\"name\":\"RAvroIgnore\",\"namespace\":" + "\"org.apache.avro.reflect.TestReflect\",\"fields\":[]}"); } @@ -628,7 +677,7 @@ public static class RAvroMeta { } @Test - public void testAnnotationAvroMeta() throws Exception { + void annotationAvroMeta() throws Exception { check(RAvroMeta.class, "{\"type\":\"record\",\"name\":\"RAvroMeta\",\"namespace\":" + "\"org.apache.avro.reflect.TestReflect\",\"fields\":[" + "{\"name\":\"a\",\"type\":\"int\",\"K\":\"V\"}]" @@ -644,7 +693,7 @@ public static class RAvroMultiMeta { } @Test - public void testAnnotationMultiAvroMeta() { + void annotationMultiAvroMeta() { check(RAvroMultiMeta.class, "{\"type\":\"record\",\"name\":\"RAvroMultiMeta\",\"namespace\":" + "\"org.apache.avro.reflect.TestReflect\",\"fields\":[" @@ -657,9 +706,11 @@ public static class RAvroDuplicateFieldMeta { int a; } - @Test(expected = AvroTypeException.class) - public void testAnnotationDuplicateFieldAvroMeta() { - ReflectData.get().getSchema(RAvroDuplicateFieldMeta.class); + @Test + void annotationDuplicateFieldAvroMeta() { + assertThrows(AvroTypeException.class, () -> { + ReflectData.get().getSchema(RAvroDuplicateFieldMeta.class); + }); } @AvroMeta(key = "K", value = "V") @@ -668,9 +719,11 @@ public static class RAvroDuplicateTypeMeta { int a; } - @Test(expected = AvroTypeException.class) - public void testAnnotationDuplicateTypeAvroMeta() { - ReflectData.get().getSchema(RAvroDuplicateTypeMeta.class); + @Test + void annotationDuplicateTypeAvroMeta() { + assertThrows(AvroTypeException.class, () -> { + ReflectData.get().getSchema(RAvroDuplicateTypeMeta.class); + }); } public static class RAvroName { @@ -679,7 +732,7 @@ public static class RAvroName { } @Test - public void testAnnotationAvroName() throws Exception { + void annotationAvroName() throws Exception { check(RAvroName.class, "{\"type\":\"record\",\"name\":\"RAvroName\",\"namespace\":" + "\"org.apache.avro.reflect.TestReflect\",\"fields\":[" + "{\"name\":\"b\",\"type\":\"int\"}]}"); } @@ -690,12 +743,14 @@ public static class RAvroNameCollide { int b; } - @Test(expected = Exception.class) - public void testAnnotationAvroNameCollide() throws Exception { - check(RAvroNameCollide.class, - "{\"type\":\"record\",\"name\":\"RAvroNameCollide\",\"namespace\":" - + "\"org.apache.avro.reflect.TestReflect\",\"fields\":[" + "{\"name\":\"b\",\"type\":\"int\"}," - + "{\"name\":\"b\",\"type\":\"int\"}]}"); + @Test + void annotationAvroNameCollide() throws Exception { + assertThrows(Exception.class, () -> { + check(RAvroNameCollide.class, + "{\"type\":\"record\",\"name\":\"RAvroNameCollide\",\"namespace\":" + + "\"org.apache.avro.reflect.TestReflect\",\"fields\":[" + "{\"name\":\"b\",\"type\":\"int\"}," + + "{\"name\":\"b\",\"type\":\"int\"}]}"); + }); } public static class RAvroStringableField { @@ -704,7 +759,7 @@ public static class RAvroStringableField { } @Test - public void testAnnotationAvroStringableFields() throws Exception { + void annotationAvroStringableFields() throws Exception { check(RAvroStringableField.class, "{\"type\":\"record\",\"name\":\"RAvroStringableField\",\"namespace\":" + "\"org.apache.avro.reflect.TestReflect\",\"fields\":[" + "{\"name\":\"a\",\"type\":\"string\"}]}"); } @@ -718,7 +773,7 @@ private void check(java.lang.reflect.Type type, String schemaJson) { } @Test - public void testRecordIO() throws IOException { + void recordIO() throws IOException { Schema schm = ReflectData.get().getSchema(SampleRecord.class); ReflectDatumWriter writer = new ReflectDatumWriter<>(schm); ByteArrayOutputStream out = new ByteArrayOutputStream(); @@ -792,7 +847,7 @@ public static class multipleAnnotationRecord { } @Test - public void testMultipleAnnotations() throws IOException { + void multipleAnnotations() throws IOException { Schema schm = ReflectData.get().getSchema(multipleAnnotationRecord.class); ReflectDatumWriter writer = new ReflectDatumWriter<>(schm); ByteArrayOutputStream out = new ByteArrayOutputStream(); @@ -813,21 +868,21 @@ public void testMultipleAnnotations() throws IOException { ReflectDatumReader reader = new ReflectDatumReader<>(schm); multipleAnnotationRecord decoded = reader.read(new multipleAnnotationRecord(), DecoderFactory.get().binaryDecoder(out.toByteArray(), null)); - assertTrue(decoded.i1 == null); - assertTrue(decoded.i2 == null); - assertTrue(decoded.i3 == null); - assertTrue(decoded.i4 == null); - assertTrue(decoded.i5 == 5); - assertTrue(decoded.i6 == 6); - assertTrue(decoded.i7.getTime() == 7); - assertTrue(decoded.i8 == 8); - assertTrue(decoded.i9.getTime() == 9); - assertTrue(decoded.i10.getTime() == 10); - assertTrue(decoded.i11.getTime() == 11); + assertNull(decoded.i1); + assertNull(decoded.i2); + assertNull(decoded.i3); + assertNull(decoded.i4); + assertEquals(decoded.i5, 5); + assertEquals(decoded.i6, 6); + assertEquals(decoded.i7.getTime(), 7); + assertEquals(decoded.i8, 8); + assertEquals(decoded.i9.getTime(), 9); + assertEquals(decoded.i10.getTime(), 10); + assertEquals(decoded.i11.getTime(), 11); } @Test - public void testAvroEncodeInducing() throws IOException { + void avroEncodeInducing() throws IOException { Schema schm = ReflectData.get().getSchema(AvroEncRecord.class); assertEquals(schm.toString(), "{\"type\":\"record\",\"name\":\"AvroEncRecord\",\"namespace" @@ -836,7 +891,7 @@ public void testAvroEncodeInducing() throws IOException { } @Test - public void testAvroEncodeIO() throws IOException { + void avroEncodeIO() throws IOException { Schema schm = ReflectData.get().getSchema(AvroEncRecord.class); ReflectDatumWriter writer = new ReflectDatumWriter<>(schm); ByteArrayOutputStream out = new ByteArrayOutputStream(); @@ -850,7 +905,7 @@ public void testAvroEncodeIO() throws IOException { } @Test - public void testRecordWithNullIO() throws IOException { + void recordWithNullIO() throws IOException { ReflectData reflectData = ReflectData.AllowNull.get(); Schema schm = reflectData.getSchema(AnotherSampleRecord.class); ReflectDatumWriter writer = new ReflectDatumWriter<>(schm); @@ -871,27 +926,6 @@ public void testRecordWithNullIO() throws IOException { assertEquals(b, decoded); } - @Test - public void testDisableUnsafe() throws Exception { - String saved = System.getProperty("avro.disable.unsafe"); - try { - System.setProperty("avro.disable.unsafe", "true"); - ReflectData.ACCESSOR_CACHE.remove(multipleAnnotationRecord.class); - ReflectData.ACCESSOR_CACHE.remove(AnotherSampleRecord.class); - ReflectionUtil.resetFieldAccess(); - testMultipleAnnotations(); - testRecordWithNullIO(); - } finally { - if (saved == null) - System.clearProperty("avro.disable.unsafe"); - else - System.setProperty("avro.disable.unsafe", saved); - ReflectData.ACCESSOR_CACHE.remove(multipleAnnotationRecord.class); - ReflectData.ACCESSOR_CACHE.remove(AnotherSampleRecord.class); - ReflectionUtil.resetFieldAccess(); - } - } - public static class SampleRecord { public int x = 1; private int y = 2; @@ -969,10 +1003,11 @@ public static interface C { } @Test - public void testForwardReference() { + void forwardReference() { ReflectData data = ReflectData.get(); Protocol reflected = data.getProtocol(C.class); - Protocol reparsed = Protocol.parse(reflected.toString()); + String ref = reflected.toString(); + Protocol reparsed = Protocol.parse(ref); assertEquals(reflected, reparsed); assert (reparsed.getTypes().contains(data.getSchema(A.class))); assert (reparsed.getTypes().contains(data.getSchema(B1.class))); @@ -986,18 +1021,20 @@ public static interface P3 { void m1(int x); } - @Test(expected = AvroTypeException.class) - public void testOverloadedMethod() { - ReflectData.get().getProtocol(P3.class); + @Test + void overloadedMethod() { + assertThrows(AvroTypeException.class, () -> { + ReflectData.get().getProtocol(P3.class); + }); } @Test - public void testNoPackageSchema() throws Exception { + void noPackageSchema() throws Exception { ReflectData.get().getSchema(Class.forName("NoPackage")); } @Test - public void testNoPackageProtocol() throws Exception { + void noPackageProtocol() throws Exception { ReflectData.get().getProtocol(Class.forName("NoPackage")); } @@ -1005,9 +1042,9 @@ public static class Y { int i; } - @Test /** Test nesting of reflect data within generic. */ - public void testReflectWithinGeneric() throws Exception { + @Test + void reflectWithinGeneric() throws Exception { ReflectData data = ReflectData.get(); // define a record with a field that's a specific Y Schema schema = Schema.createRecord("Foo", "", "x.y.z", false); @@ -1026,12 +1063,12 @@ public void testReflectWithinGeneric() throws Exception { } @Test - public void testPrimitiveArray() throws Exception { + void primitiveArray() throws Exception { testPrimitiveArrays(false); } @Test - public void testPrimitiveArrayBlocking() throws Exception { + void primitiveArrayBlocking() throws Exception { testPrimitiveArrays(true); } @@ -1080,15 +1117,15 @@ private Object randomFor(Class c, Random r) { /** Test union of null and an array. */ @Test - public void testNullArray() throws Exception { + void nullArray() throws Exception { String json = "[{\"type\":\"array\", \"items\": \"long\"}, \"null\"]"; - Schema schema = new Schema.Parser().parse(json); + Schema schema = SchemaParser.parseSingle(json); checkBinary(schema, null); } /** Test stringable classes. */ @Test - public void testStringables() throws Exception { + void stringables() throws Exception { checkStringable(java.math.BigDecimal.class, "10"); checkStringable(java.math.BigInteger.class, "20"); checkStringable(java.net.URI.class, "foo://bar:9000/baz"); @@ -1113,7 +1150,7 @@ public static class M1 { /** Test Map with stringable key classes. */ @Test - public void testStringableMapKeys() throws Exception { + void stringableMapKeys() throws Exception { M1 record = new M1(); record.integerKeyMap = new HashMap<>(1); record.integerKeyMap.put(10, "foo"); @@ -1137,7 +1174,7 @@ public static class NullableStringable { } @Test - public void testNullableStringableField() throws Exception { + void nullableStringableField() throws Exception { NullableStringable datum = new NullableStringable(); datum.number = java.math.BigDecimal.TEN; @@ -1174,7 +1211,24 @@ public static void checkBinary(Schema schema, Object datum) throws IOException { /** Test that the error message contains the name of the class. */ @Test - public void testReflectFieldError() throws Exception { + @EnabledForJreRange(min = JRE.JAVA_8, max = JRE.JAVA_11, disabledReason = "Java 11 announced: All illegal access operations will be denied in a future release") + // Java 11: + // - WARNING: An illegal reflective access operation has occurred + // - WARNING: Illegal reflective access by + // org.apache.avro.reflect.FieldAccessReflect$ReflectionBasedAccessor to field + // java.lang.String.coder + // - WARNING: Please consider reporting this to the maintainers of + // org.apache.avro.reflect.FieldAccessReflect$ReflectionBasedAccessor + // - WARNING: Use --illegal-access=warn to enable warnings of further illegal + // reflective access operations + // - WARNING: All illegal access operations will be denied in a future release + // Java 17: + // - [ERROR] org.apache.avro.reflect.TestReflect.reflectFieldError -- Time + // elapsed: 0.015 s <<< ERROR! + // - java.lang.reflect.InaccessibleObjectException: Unable to make field private + // final byte java.lang.String.coder accessible: module java.base does not + // "opens java.lang" to unnamed module @5a6d67c3 + void reflectFieldError() throws Exception { Object datum = ""; try { ReflectData.get().getField(datum, "notAFieldOfString", 0); @@ -1196,7 +1250,7 @@ private static class AliasC { } @Test - public void testAvroAliasOnClass() { + void avroAliasOnClass() { check(AliasA.class, "{\"type\":\"record\",\"name\":\"AliasA\",\"namespace\":\"org.apache.avro.reflect.TestReflect\",\"fields\":[],\"aliases\":[\"b.a\"]}"); check(AliasB.class, @@ -1212,7 +1266,7 @@ private static class MultipleAliasRecord { } @Test - public void testMultipleAliasAnnotationsOnClass() { + void multipleAliasAnnotationsOnClass() { check(MultipleAliasRecord.class, "{\"type\":\"record\",\"name\":\"MultipleAliasRecord\",\"namespace\":\"org.apache.avro.reflect.TestReflect\",\"fields\":[],\"aliases\":[\"space1.alias1\",\"space2.alias2\"]}"); @@ -1222,19 +1276,20 @@ private static class Z { } @Test - public void testDollarTerminatedNamespaceCompatibility() { + void dollarTerminatedNamespaceCompatibility() { ReflectData data = ReflectData.get(); - Schema s = new Schema.Parser().setValidate(false).parse( + Schema s = JsonSchemaParser.parseInternal( "{\"type\":\"record\",\"name\":\"Z\",\"namespace\":\"org.apache.avro.reflect.TestReflect$\",\"fields\":[]}"); - assertEquals(data.getSchema(data.getClass(s)).toString(), - "{\"type\":\"record\",\"name\":\"Z\",\"namespace\":\"org.apache.avro.reflect.TestReflect\",\"fields\":[]}"); + assertEquals( + "{\"type\":\"record\",\"name\":\"Z\",\"namespace\":\"org.apache.avro.reflect.TestReflect\",\"fields\":[]}", + data.getSchema(data.getClass(s)).toString()); } @Test - public void testDollarTerminatedNestedStaticClassNamespaceCompatibility() { + void dollarTerminatedNestedStaticClassNamespaceCompatibility() { ReflectData data = ReflectData.get(); // Older versions of Avro generated this namespace on nested records. - Schema s = new Schema.Parser().setValidate(false).parse( + Schema s = JsonSchemaParser.parseInternal( "{\"type\":\"record\",\"name\":\"AnotherSampleRecord\",\"namespace\":\"org.apache.avro.reflect.TestReflect$SampleRecord\",\"fields\":[]}"); assertThat(data.getSchema(data.getClass(s)).getFullName(), is("org.apache.avro.reflect.TestReflect.SampleRecord.AnotherSampleRecord")); @@ -1257,7 +1312,7 @@ private static class ClassWithAliasAndNamespaceOnField { } @Test - public void testAvroAliasOnField() { + void avroAliasOnField() { Schema expectedSchema = SchemaBuilder.record(ClassWithAliasOnField.class.getSimpleName()) .namespace("org.apache.avro.reflect.TestReflect").fields().name("primitiveField").aliases("aliasName") @@ -1266,19 +1321,35 @@ public void testAvroAliasOnField() { check(ClassWithAliasOnField.class, expectedSchema.toString()); } - @Test(expected = AvroRuntimeException.class) - public void namespaceDefinitionOnFieldAliasMustThrowException() { - ReflectData.get().getSchema(ClassWithAliasAndNamespaceOnField.class); + @Test + void namespaceDefinitionOnFieldAliasMustThrowException() { + assertThrows(AvroRuntimeException.class, () -> { + ReflectData.get().getSchema(ClassWithAliasAndNamespaceOnField.class); + }); } @Test public void testMultipleFieldAliases() { + Field field = new Field("primitiveField", Schema.create(Schema.Type.INT)); + field.addAlias("alias1"); + field.addAlias("alias2"); + Schema avroMultiMeta = Schema.createRecord("ClassWithMultipleAliasesOnField", null, + "org.apache.avro.reflect.TestReflect", false, Arrays.asList(field)); - Schema expectedSchema = SchemaBuilder.record(ClassWithMultipleAliasesOnField.class.getSimpleName()) - .namespace("org.apache.avro.reflect.TestReflect").fields().name("primitiveField").aliases("alias1", "alias2") - .type(Schema.create(org.apache.avro.Schema.Type.INT)).noDefault().endRecord(); + Schema schema = ReflectData.get().getSchema(ClassWithMultipleAliasesOnField.class); + assertEquals(avroMultiMeta, schema); + } + + private static class OptionalTest { + Optional foo; + } - check(ClassWithMultipleAliasesOnField.class, expectedSchema.toString()); + @Test + public void testOptional() { + check(OptionalTest.class, + "{\"type\":\"record\",\"name\":\"OptionalTest\"," + + "\"namespace\":\"org.apache.avro.reflect.TestReflect\",\"fields\":[" + + "{\"name\":\"foo\",\"type\":[\"null\",\"int\"],\"default\":null}]}"); } private static class DefaultTest { @@ -1287,7 +1358,7 @@ private static class DefaultTest { } @Test - public void testAvroDefault() { + void avroDefault() { check(DefaultTest.class, "{\"type\":\"record\",\"name\":\"DefaultTest\"," + "\"namespace\":\"org.apache.avro.reflect.TestReflect\",\"fields\":[" @@ -1312,12 +1383,12 @@ public boolean equals(Object obj) { } @Test - public void testNullableByteArrayNotNullValue() throws Exception { + void nullableByteArrayNotNullValue() throws Exception { checkReadWrite(new NullableBytesTest("foo".getBytes(UTF_8))); } @Test - public void testNullableByteArrayNullValue() throws Exception { + void nullableByteArrayNullValue() throws Exception { checkReadWrite(new NullableBytesTest()); } @@ -1338,7 +1409,7 @@ private static class DocTest { } @Test - public void testAvroDoc() { + void avroDoc() { check(DocTest.class, "{\"type\":\"record\",\"name\":\"DocTest\",\"namespace\":\"org.apache.avro.reflect.TestReflect\"," + "\"doc\":\"DocTest class docs\"," + "\"fields\":[" @@ -1349,4 +1420,61 @@ public void testAvroDoc() { + "{\"name\":\"foo\",\"type\":\"int\",\"doc\":\"Some Documentation\"}" + "]}"); } + // test recursive record schema + public static class TreeNode { + public int value = 0; + @Nullable + public TreeNode left; + @Nullable + public TreeNode right; + + public TreeNode() { + } + + public TreeNode(int value) { + this.value = value; + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof TreeNode)) + return false; + TreeNode that = (TreeNode) o; + if (value != that.value || !Objects.equals(left, that.left) || !Objects.equals(right, that.right)) + return false; + return true; + } + + @Override + public int hashCode() { + return Objects.hash(value, left, right); + } + + } + + @Test + void recursiveRecord() throws Exception { + Schema schema = ReflectData.get().getSchema(TreeNode.class); + assertEquals("TreeNode", schema.getName()); + assertEquals(3, schema.getFields().size()); + + // Verify that the left tree node contains the parent schema + Schema leftSchema = schema.getField("left").schema(); + assertEquals(Schema.Type.UNION, leftSchema.getType()); + assertEquals(2, leftSchema.getTypes().size()); + assertEquals(Schema.Type.NULL, leftSchema.getTypes().get(0).getType()); + assertEquals(schema, leftSchema.getTypes().get(1)); + + // Verify that the right tree node is the same union + Schema rightSchema = schema.getField("right").schema(); + assertEquals(leftSchema, rightSchema); + + // Test serialization with actual recursive data + TreeNode root = new TreeNode(100); + root.left = new TreeNode(90); + root.right = new TreeNode(101); + root.left.left = new TreeNode(-100); + + checkReadWrite(root); + } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectAllowNulls.java b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectAllowNulls.java index acbd4fb96b1..5c138857739 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectAllowNulls.java +++ b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectAllowNulls.java @@ -17,11 +17,12 @@ */ package org.apache.avro.reflect; +import static org.junit.jupiter.api.Assertions.assertEquals; + import java.util.Arrays; import org.apache.avro.Schema; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestReflectAllowNulls { @@ -66,56 +67,56 @@ private static class AllowNullWithNullable { } @Test - public void testPrimitives() { + void primitives() { // AllowNull only makes fields nullable, so testing must use a base record Schema primitives = ReflectData.AllowNull.get().getSchema(Primitives.class); - Assert.assertEquals(requiredSchema(boolean.class), primitives.getField("aBoolean").schema()); - Assert.assertEquals(requiredSchema(byte.class), primitives.getField("aByte").schema()); - Assert.assertEquals(requiredSchema(short.class), primitives.getField("aShort").schema()); - Assert.assertEquals(requiredSchema(int.class), primitives.getField("anInt").schema()); - Assert.assertEquals(requiredSchema(long.class), primitives.getField("aLong").schema()); - Assert.assertEquals(requiredSchema(float.class), primitives.getField("aFloat").schema()); - Assert.assertEquals(requiredSchema(double.class), primitives.getField("aDouble").schema()); + assertEquals(requiredSchema(boolean.class), primitives.getField("aBoolean").schema()); + assertEquals(requiredSchema(byte.class), primitives.getField("aByte").schema()); + assertEquals(requiredSchema(short.class), primitives.getField("aShort").schema()); + assertEquals(requiredSchema(int.class), primitives.getField("anInt").schema()); + assertEquals(requiredSchema(long.class), primitives.getField("aLong").schema()); + assertEquals(requiredSchema(float.class), primitives.getField("aFloat").schema()); + assertEquals(requiredSchema(double.class), primitives.getField("aDouble").schema()); } @Test - public void testWrappers() { + void wrappers() { // AllowNull only makes fields nullable, so testing must use a base record Schema wrappers = ReflectData.AllowNull.get().getSchema(Wrappers.class); - Assert.assertEquals(nullableSchema(boolean.class), wrappers.getField("aBoolean").schema()); - Assert.assertEquals(nullableSchema(byte.class), wrappers.getField("aByte").schema()); - Assert.assertEquals(nullableSchema(short.class), wrappers.getField("aShort").schema()); - Assert.assertEquals(nullableSchema(int.class), wrappers.getField("anInt").schema()); - Assert.assertEquals(nullableSchema(long.class), wrappers.getField("aLong").schema()); - Assert.assertEquals(nullableSchema(float.class), wrappers.getField("aFloat").schema()); - Assert.assertEquals(nullableSchema(double.class), wrappers.getField("aDouble").schema()); - Assert.assertEquals(nullableSchema(Primitives.class), wrappers.getField("anObject").schema()); + assertEquals(nullableSchema(boolean.class), wrappers.getField("aBoolean").schema()); + assertEquals(nullableSchema(byte.class), wrappers.getField("aByte").schema()); + assertEquals(nullableSchema(short.class), wrappers.getField("aShort").schema()); + assertEquals(nullableSchema(int.class), wrappers.getField("anInt").schema()); + assertEquals(nullableSchema(long.class), wrappers.getField("aLong").schema()); + assertEquals(nullableSchema(float.class), wrappers.getField("aFloat").schema()); + assertEquals(nullableSchema(double.class), wrappers.getField("aDouble").schema()); + assertEquals(nullableSchema(Primitives.class), wrappers.getField("anObject").schema()); } @Test - public void testAllowNullWithNullableAnnotation() { + void allowNullWithNullableAnnotation() { Schema withNullable = ReflectData.AllowNull.get().getSchema(AllowNullWithNullable.class); - Assert.assertEquals("Should produce a nullable double", nullableSchema(double.class), - withNullable.getField("aDouble").schema()); + assertEquals(nullableSchema(double.class), withNullable.getField("aDouble").schema(), + "Should produce a nullable double"); Schema nullableDoubleOrLong = Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.DOUBLE), Schema.create(Schema.Type.LONG))); - Assert.assertEquals("Should add null to a non-null union", nullableDoubleOrLong, - withNullable.getField("doubleOrLong").schema()); + assertEquals(nullableDoubleOrLong, withNullable.getField("doubleOrLong").schema(), + "Should add null to a non-null union"); - Assert.assertEquals("Should add null to a non-null union", nullableDoubleOrLong, - withNullable.getField("doubleOrLongOrNull1").schema()); + assertEquals(nullableDoubleOrLong, withNullable.getField("doubleOrLongOrNull1").schema(), + "Should add null to a non-null union"); Schema doubleOrLongOrNull = Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.DOUBLE), Schema.create(Schema.Type.LONG), Schema.create(Schema.Type.NULL))); - Assert.assertEquals("Should add null to a non-null union", doubleOrLongOrNull, - withNullable.getField("doubleOrLongOrNull2").schema()); + assertEquals(doubleOrLongOrNull, withNullable.getField("doubleOrLongOrNull2").schema(), + "Should add null to a non-null union"); - Assert.assertEquals("Should add null to a non-null union", doubleOrLongOrNull, - withNullable.getField("doubleOrLongOrNull3").schema()); + assertEquals(doubleOrLongOrNull, withNullable.getField("doubleOrLongOrNull3").schema(), + "Should add null to a non-null union"); } private Schema requiredSchema(Class type) { diff --git a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectData.java b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectData.java index 59009883663..40d9ad6decb 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectData.java +++ b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectData.java @@ -19,23 +19,32 @@ package org.apache.avro.reflect; import org.apache.avro.AvroTypeException; +import org.apache.avro.JsonSchemaParser; import org.apache.avro.Protocol; import org.apache.avro.Schema; +import org.apache.avro.SchemaParser; import org.apache.avro.util.internal.JacksonUtils; -import org.junit.Test; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledForJreRange; +import org.junit.jupiter.api.condition.JRE; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; -import static org.hamcrest.Matchers.*; -import static org.junit.Assert.*; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.lessThan; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; public class TestReflectData { @Test @SuppressWarnings("unchecked") - public void testWeakSchemaCaching() throws Exception { + void weakSchemaCaching() throws Exception { int numSchemas = 1000000; for (int i = 0; i < numSchemas; i++) { // Create schema @@ -54,7 +63,7 @@ public void testWeakSchemaCaching() throws Exception { } @Test - public void testGenericProtocol() { + void genericProtocol() { Protocol protocol = ReflectData.get().getProtocol(FooBarProtocol.class); Schema recordSchema = ReflectData.get().getSchema(FooBarReflectiveRecord.class); @@ -107,7 +116,7 @@ static class Meta { } @Test - public void testCreateSchemaDefaultValue() { + void createSchemaDefaultValue() { Meta meta = new Meta(); validateSchema(meta); @@ -121,13 +130,12 @@ private void validateSchema(Meta meta) { final String schemaString = schema.toString(true); - Schema.Parser parser = new Schema.Parser(); - Schema cloneSchema = parser.parse(schemaString); + Schema cloneSchema = SchemaParser.parseSingle(schemaString); Map testCases = JacksonUtils.objectToMap(meta); for (Schema.Field field : cloneSchema.getFields()) { - assertEquals("Invalid field " + field.name(), field.defaultVal(), testCases.get(field.name())); + assertEquals(field.defaultVal(), testCases.get(field.name()), "Invalid field " + field.name()); } } @@ -135,13 +143,17 @@ public class Definition { public Map tokens; } - @Test(expected = AvroTypeException.class) - public void testNonStaticInnerClasses() { - ReflectData.get().getSchema(Definition.class); + @Test + // FIXME: Why does this test fail under JDK 21? + @EnabledForJreRange(min = JRE.JAVA_8, max = JRE.JAVA_17, disabledReason = "Doesn't work under JRE 21, no clue why") + void nonStaticInnerClasses() { + assertThrows(AvroTypeException.class, () -> { + ReflectData.get().getSchema(Definition.class); + }); } @Test - public void testStaticInnerClasses() { + void staticInnerClasses() { ReflectData.get().getSchema(Meta.class); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectDatumReader.java b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectDatumReader.java index e431f8f5599..ecd2cecb677 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectDatumReader.java +++ b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectDatumReader.java @@ -18,18 +18,27 @@ package org.apache.avro.reflect; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.Arrays; +import java.util.HashSet; +import java.util.HashMap; import java.util.List; +import java.util.Set; +import java.util.Map; +import java.util.Optional; +import org.apache.avro.Schema; import org.apache.avro.io.Decoder; import org.apache.avro.io.DecoderFactory; import org.apache.avro.io.Encoder; import org.apache.avro.io.EncoderFactory; -import org.junit.Test; +import org.apache.avro.util.ClassSecurityValidator; +import org.apache.avro.util.ClassSecurityValidator.ClassSecurityPredicate; +import org.junit.jupiter.api.Test; public class TestReflectDatumReader { @@ -43,8 +52,23 @@ private static byte[] serializeWithReflectDatumWriter(T toSerialize, Class new ReflectDatumReader<>(PojoWithArray.class)); + } finally { + ClassSecurityValidator.setGlobal(originalValidator); + } + } + + @Test + void read_PojoWithList() throws IOException { PojoWithList pojoWithList = new PojoWithList(); pojoWithList.setId(42); pojoWithList.setRelatedIds(Arrays.asList(1, 2, 3)); @@ -62,7 +86,7 @@ public void testRead_PojoWithList() throws IOException { } @Test - public void testRead_PojoWithArray() throws IOException { + void read_PojoWithArray() throws IOException { PojoWithArray pojoWithArray = new PojoWithArray(); pojoWithArray.setId(42); pojoWithArray.setRelatedIds(new int[] { 1, 2, 3 }); @@ -78,6 +102,113 @@ public void testRead_PojoWithArray() throws IOException { assertEquals(pojoWithArray, deserialized); } + @Test + public void testRead_PojoWithSet() throws IOException { + PojoWithSet pojoWithSet = new PojoWithSet(); + pojoWithSet.setId(42); + Set relatedIds = new HashSet<>(); + relatedIds.add(1); + relatedIds.add(2); + relatedIds.add(3); + pojoWithSet.setRelatedIds(relatedIds); + + byte[] serializedBytes = serializeWithReflectDatumWriter(pojoWithSet, PojoWithSet.class); + + Decoder decoder = DecoderFactory.get().binaryDecoder(serializedBytes, null); + ReflectDatumReader reflectDatumReader = new ReflectDatumReader<>(PojoWithSet.class); + + PojoWithSet deserialized = new PojoWithSet(); + reflectDatumReader.read(deserialized, decoder); + + assertEquals(pojoWithSet, deserialized); + + } + + @Test + public void testRead_PojoWithMap() throws IOException { + PojoWithMap pojoWithMap = new PojoWithMap(); + pojoWithMap.setId(42); + Map relatedIds = new HashMap<>(); + relatedIds.put(1, 11); + relatedIds.put(2, 22); + relatedIds.put(3, 33); + pojoWithMap.setRelatedIds(relatedIds); + + byte[] serializedBytes = serializeWithReflectDatumWriter(pojoWithMap, PojoWithMap.class); + + Decoder decoder = DecoderFactory.get().binaryDecoder(serializedBytes, null); + ReflectDatumReader reflectDatumReader = new ReflectDatumReader<>(PojoWithMap.class); + + PojoWithMap deserialized = new PojoWithMap(); + reflectDatumReader.read(deserialized, decoder); + + assertEquals(pojoWithMap, deserialized); + } + + @Test + public void testRead_PojoWithOptional() throws IOException { + PojoWithOptional pojoWithOptional = new PojoWithOptional(); + pojoWithOptional.setId(42); + pojoWithOptional.setRelatedId(Optional.of(13)); + + byte[] serializedBytes = serializeWithReflectDatumWriter(pojoWithOptional, PojoWithOptional.class); + + Decoder decoder = DecoderFactory.get().binaryDecoder(serializedBytes, null); + ReflectDatumReader reflectDatumReader = new ReflectDatumReader<>(PojoWithOptional.class); + + PojoWithOptional deserialized = new PojoWithOptional(); + reflectDatumReader.read(deserialized, decoder); + + assertEquals(pojoWithOptional, deserialized); + } + + @Test + public void testRead_PojoWithEmptyOptional() throws IOException { + PojoWithOptional pojoWithOptional = new PojoWithOptional(); + pojoWithOptional.setId(42); + pojoWithOptional.setRelatedId(Optional.empty()); + + byte[] serializedBytes = serializeWithReflectDatumWriter(pojoWithOptional, PojoWithOptional.class); + + Decoder decoder = DecoderFactory.get().binaryDecoder(serializedBytes, null); + ReflectDatumReader reflectDatumReader = new ReflectDatumReader<>(PojoWithOptional.class); + + PojoWithOptional deserialized = new PojoWithOptional(); + reflectDatumReader.read(deserialized, decoder); + + assertEquals(pojoWithOptional, deserialized); + } + + @Test + public void testRead_PojoWithNullableAnnotation() throws IOException { + PojoWithBasicTypeNullableAnnotationV1 v1Pojo = new PojoWithBasicTypeNullableAnnotationV1(); + int idValue = 1; + v1Pojo.setId(idValue); + byte[] serializedBytes = serializeWithReflectDatumWriter(v1Pojo, PojoWithBasicTypeNullableAnnotationV1.class); + Decoder decoder = DecoderFactory.get().binaryDecoder(serializedBytes, null); + + ReflectData reflectData = ReflectData.get(); + Schema schemaV1 = reflectData.getSchema(PojoWithBasicTypeNullableAnnotationV1.class); + Schema schemaV2 = reflectData.getSchema(PojoWithBasicTypeNullableAnnotationV2.class); + + ReflectDatumReader reflectDatumReader = new ReflectDatumReader<>(schemaV1, + schemaV2); + + PojoWithBasicTypeNullableAnnotationV2 v2Pojo = new PojoWithBasicTypeNullableAnnotationV2(); + reflectDatumReader.read(v2Pojo, decoder); + + assertEquals(v1Pojo.id, v2Pojo.id); + assertEquals(v2Pojo.id, idValue); + assertEquals(v2Pojo.intId, FieldAccess.INT_DEFAULT_VALUE); + assertEquals(v2Pojo.floatId, FieldAccess.FLOAT_DEFAULT_VALUE); + assertEquals(v2Pojo.shortId, FieldAccess.SHORT_DEFAULT_VALUE); + assertEquals(v2Pojo.byteId, FieldAccess.BYTE_DEFAULT_VALUE); + assertEquals(v2Pojo.booleanId, FieldAccess.BOOLEAN_DEFAULT_VALUE); + assertEquals(v2Pojo.charId, FieldAccess.CHAR_DEFAULT_VALUE); + assertEquals(v2Pojo.longId, FieldAccess.LONG_DEFAULT_VALUE); + assertEquals(v2Pojo.doubleId, FieldAccess.DOUBLE_DEFAULT_VALUE); + } + public static class PojoWithList { private int id; private List relatedIds; @@ -167,6 +298,325 @@ public boolean equals(Object obj) { return false; return Arrays.equals(relatedIds, other.relatedIds); } + } + + public static class PojoWithSet { + private int id; + private Set relatedIds; + + public int getId() { + return id; + } + + public void setId(int id) { + this.id = id; + } + + public Set getRelatedIds() { + return relatedIds; + } + public void setRelatedIds(Set relatedIds) { + this.relatedIds = relatedIds; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + id; + result = prime * result + ((relatedIds == null) ? 0 : relatedIds.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + PojoWithSet other = (PojoWithSet) obj; + if (id != other.id) + return false; + if (relatedIds == null) { + return other.relatedIds == null; + } else + return relatedIds.equals(other.relatedIds); + } + } + + public static class PojoWithMap { + private int id; + private Map relatedIds; + + public int getId() { + return id; + } + + public void setId(int id) { + this.id = id; + } + + public Map getRelatedIds() { + return relatedIds; + } + + public void setRelatedIds(Map relatedIds) { + this.relatedIds = relatedIds; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + id; + result = prime * result + ((relatedIds == null) ? 0 : relatedIds.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + PojoWithMap other = (PojoWithMap) obj; + if (id != other.id) + return false; + if (relatedIds == null) { + return other.relatedIds == null; + } else + return relatedIds.equals(other.relatedIds); + } + } + + public static class PojoWithOptional { + private int id; + + private Optional relatedId; + + public int getId() { + return id; + } + + public void setId(int id) { + this.id = id; + } + + public Optional getRelatedId() { + return relatedId; + } + + public void setRelatedId(Optional relatedId) { + this.relatedId = relatedId; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + id; + result = prime * result + ((relatedId == null) ? 0 : relatedId.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + PojoWithOptional other = (PojoWithOptional) obj; + if (id != other.id) + return false; + if (relatedId == null) { + return other.relatedId == null; + } else + return relatedId.equals(other.relatedId); + } + } + + public static class PojoWithBasicTypeNullableAnnotationV1 { + + private int id; + + public int getId() { + return id; + } + + public void setId(int id) { + this.id = id; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + id; + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + PojoWithBasicTypeNullableAnnotationV1 other = (PojoWithBasicTypeNullableAnnotationV1) obj; + return id == other.id; + } + } + + public static class PojoWithBasicTypeNullableAnnotationV2 { + + private int id; + + @Nullable + private int intId; + + @Nullable + private float floatId; + + @Nullable + private short shortId; + + @Nullable + private byte byteId; + + @Nullable + private boolean booleanId; + + @Nullable + private char charId; + + @Nullable + private long longId; + + @Nullable + private double doubleId; + + public int getId() { + return id; + } + + public void setId(int id) { + this.id = id; + } + + public int getIntId() { + return intId; + } + + public void setIntId(int intId) { + this.intId = intId; + } + + public float getFloatId() { + return floatId; + } + + public void setFloatId(float floatId) { + this.floatId = floatId; + } + + public short getShortId() { + return shortId; + } + + public void setShortId(short shortId) { + this.shortId = shortId; + } + + public byte getByteId() { + return byteId; + } + + public void setByteId(byte byteId) { + this.byteId = byteId; + } + + public boolean isBooleanId() { + return booleanId; + } + + public void setBooleanId(boolean booleanId) { + this.booleanId = booleanId; + } + + public char getCharId() { + return charId; + } + + public void setCharId(char charId) { + this.charId = charId; + } + + public long getLongId() { + return longId; + } + + public void setLongId(long longId) { + this.longId = longId; + } + + public double getDoubleId() { + return doubleId; + } + + public void setDoubleId(double doubleId) { + this.doubleId = doubleId; + } + + @Override + public int hashCode() { + final int prime = 31; + long temp; + int result = 1; + result = prime * result + id; + result = prime * result + intId; + result = prime * result + (floatId != 0.0f ? Float.floatToIntBits(floatId) : 0); + result = prime * result + (int) shortId; + result = prime * result + (int) byteId; + result = prime * result + (booleanId ? 1 : 0); + result = prime * result + (int) charId; + result = prime * result + (int) (longId ^ (longId >>> 32)); + temp = Double.doubleToLongBits(doubleId); + result = 31 * result + (int) (temp ^ (temp >>> 32)); + return result; + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (o == null || getClass() != o.getClass()) + return false; + PojoWithBasicTypeNullableAnnotationV2 that = (PojoWithBasicTypeNullableAnnotationV2) o; + if (id != that.id) + return false; + if (intId != that.intId) + return false; + if (Float.compare(that.floatId, floatId) != 0) + return false; + if (shortId != that.shortId) + return false; + if (byteId != that.byteId) + return false; + if (booleanId != that.booleanId) + return false; + if (charId != that.charId) + return false; + if (longId != that.longId) + return false; + return Double.compare(that.doubleId, doubleId) == 0; + } } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectDatumWithAnonymousInstances.java b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectDatumWithAnonymousInstances.java new file mode 100644 index 00000000000..a076593a56f --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectDatumWithAnonymousInstances.java @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.reflect; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +import org.apache.avro.Schema; +import org.apache.avro.io.Decoder; +import org.apache.avro.io.DecoderFactory; +import org.apache.avro.io.Encoder; +import org.apache.avro.io.EncoderFactory; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +/** + * https://issues.apache.org/jira/browse/AVRO-1851 + */ +public class TestReflectDatumWithAnonymousInstances { + private static Pojo pojo; + + @BeforeAll + public static void init() { + // 1. Anonymous instance + pojo = new Pojo() { + { + // 2. Anonymous instance + Person person = new Person() { + { + setAddress("Address"); + } + }; + setPerson(person); + // 3. Anonymous instance + setTestEnum(TestEnum.V); + } + }; + } + + // Properly serializes and deserializes a POJO with an enum instance + // (TestEnum#V) + @Test + void handleProperlyEnumInstances() throws IOException { + byte[] output = serialize(pojo); + Pojo deserializedPojo = deserialize(output); + assertEquals(pojo, deserializedPojo); + assertTrue(deserializedPojo.getTestEnum().is_V()); + } + + private Pojo deserialize(byte[] input) throws IOException { + ByteArrayInputStream inputStream = new ByteArrayInputStream(input); + Decoder decoder = DecoderFactory.get().binaryDecoder(inputStream, null); + ReflectData reflectData = ReflectData.AllowNull.get(); + ReflectDatumReader reflectDatumReader = new ReflectDatumReader<>(reflectData); + Schema schema = reflectData.getSchema(Pojo.class); + reflectDatumReader.setSchema(schema); + return reflectDatumReader.read(null, decoder); + } + + private byte[] serialize(Pojo input) throws IOException { + // Reflect data that supports nulls + ReflectData reflectData = ReflectData.AllowNull.get(); + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + Encoder encoder = EncoderFactory.get().binaryEncoder(outputStream, null); + ReflectDatumWriter datumWriter = new ReflectDatumWriter<>(Pojo.class, reflectData); + datumWriter.write(input, encoder); + encoder.flush(); + return outputStream.toByteArray(); + } + + private static class Pojo { + private TestEnum testEnum; + private Person person; + + public TestEnum getTestEnum() { + return testEnum; + } + + public void setTestEnum(TestEnum testEnum) { + this.testEnum = testEnum; + } + + public Person getPerson() { + return person; + } + + public void setPerson(Person person) { + this.person = person; + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + + if (o == null) + return false; + + Class thisClass = getClass(); + while (thisClass.isAnonymousClass()) { + thisClass = thisClass.getSuperclass(); + } + + Class oClass = o.getClass(); + while (oClass.isAnonymousClass()) { + oClass = oClass.getSuperclass(); + } + + if (thisClass != oClass) + return false; + + Pojo pojo = (Pojo) o; + + if (testEnum != pojo.testEnum) + return false; + return person != null ? person.equals(pojo.person) : pojo.person == null; + } + + @Override + public int hashCode() { + int result = testEnum != null ? testEnum.hashCode() : 0; + result = 31 * result + (person != null ? person.hashCode() : 0); + return result; + } + + @Override + public String toString() { + return "Pojo{" + "testEnum=" + testEnum + ", person=" + person + '}'; + } + } + + private static class Person { + private String name; + private String address; + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getAddress() { + return address; + } + + public void setAddress(String address) { + this.address = address; + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + + if (o == null) + return false; + + Class thisClass = getClass(); + while (thisClass.isAnonymousClass()) { + thisClass = thisClass.getSuperclass(); + } + + Class oClass = o.getClass(); + while (oClass.isAnonymousClass()) { + oClass = oClass.getSuperclass(); + } + + if (thisClass != oClass) + return false; + + Person person = (Person) o; + + if (name != null ? !name.equals(person.name) : person.name != null) + return false; + return address != null ? address.equals(person.address) : person.address == null; + } + + @Override + public int hashCode() { + int result = name != null ? name.hashCode() : 0; + result = 31 * result + (address != null ? address.hashCode() : 0); + return result; + } + + @Override + public String toString() { + return "Person{" + "name='" + name + '\'' + ", address='" + address + '\'' + '}'; + } + } + + enum TestEnum { + V { + @Override + public boolean is_V() { + return true; + } + }; + + public boolean is_V() { + return false; + } + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectLogicalTypes.java b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectLogicalTypes.java index c23a2f7369b..d95f83f7abb 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectLogicalTypes.java +++ b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectLogicalTypes.java @@ -18,6 +18,8 @@ package org.apache.avro.reflect; +import static org.junit.jupiter.api.Assertions.*; + import java.io.File; import java.io.IOException; import java.math.BigDecimal; @@ -44,24 +46,22 @@ import org.apache.avro.io.DatumReader; import org.apache.avro.io.DatumWriter; import org.apache.avro.specific.SpecificData; -import org.junit.Assert; -import org.junit.Assume; -import org.junit.BeforeClass; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; /** * Tests various logical types * string => UUID * fixed and bytes => Decimal * * record => Pair */ public class TestReflectLogicalTypes { - @Rule - public TemporaryFolder temp = new TemporaryFolder(); + @TempDir + public File temp; public static final ReflectData REFLECT = new ReflectData(); - @BeforeClass + @BeforeAll public static void addUUID() { REFLECT.addLogicalTypeConversion(new Conversions.UUIDConversion()); REFLECT.addLogicalTypeConversion(new Conversions.DecimalConversion()); @@ -69,7 +69,7 @@ public static void addUUID() { } @Test - public void testReflectedSchema() { + void reflectedSchema() { Schema expected = SchemaBuilder.record(RecordWithUUIDList.class.getName()).fields().name("uuids").type().array() .items().stringType().noDefault().endRecord(); expected.getField("uuids").schema().addProp(SpecificData.CLASS_PROP, List.class.getName()); @@ -77,7 +77,7 @@ public void testReflectedSchema() { Schema actual = REFLECT.getSchema(RecordWithUUIDList.class); - Assert.assertEquals("Should use the UUID logical type", expected, actual); + assertEquals(expected, actual, "Should use the UUID logical type"); } // this can be static because the schema only comes from reflection @@ -112,20 +112,20 @@ public int hashCode() { } @Test - public void testDecimalBytes() throws IOException { + void decimalBytes() throws IOException { Schema schema = REFLECT.getSchema(DecimalRecordBytes.class); - Assert.assertEquals("Should have the correct record name", "org.apache.avro.reflect.TestReflectLogicalTypes", - schema.getNamespace()); - Assert.assertEquals("Should have the correct record name", "DecimalRecordBytes", schema.getName()); - Assert.assertEquals("Should have the correct logical type", LogicalTypes.decimal(9, 2), - LogicalTypes.fromSchema(schema.getField("decimal").schema())); + assertEquals("org.apache.avro.reflect.TestReflectLogicalTypes", schema.getNamespace(), + "Should have the correct record name"); + assertEquals("DecimalRecordBytes", schema.getName(), "Should have the correct record name"); + assertEquals(LogicalTypes.decimal(9, 2), LogicalTypes.fromSchema(schema.getField("decimal").schema()), + "Should have the correct logical type"); DecimalRecordBytes record = new DecimalRecordBytes(); record.decimal = new BigDecimal("3.14"); File test = write(REFLECT, schema, record); - Assert.assertEquals("Should match the decimal after round trip", Collections.singletonList(record), - read(REFLECT.createDatumReader(schema), test)); + assertEquals(Collections.singletonList(record), read(REFLECT.createDatumReader(schema), test), + "Should match the decimal after round trip"); } // this can be static because the schema only comes from reflection @@ -160,20 +160,20 @@ public int hashCode() { } @Test - public void testDecimalFixed() throws IOException { + void decimalFixed() throws IOException { Schema schema = REFLECT.getSchema(DecimalRecordFixed.class); - Assert.assertEquals("Should have the correct record name", "org.apache.avro.reflect.TestReflectLogicalTypes", - schema.getNamespace()); - Assert.assertEquals("Should have the correct record name", "DecimalRecordFixed", schema.getName()); - Assert.assertEquals("Should have the correct logical type", LogicalTypes.decimal(9, 2), - LogicalTypes.fromSchema(schema.getField("decimal").schema())); + assertEquals("org.apache.avro.reflect.TestReflectLogicalTypes", schema.getNamespace(), + "Should have the correct record name"); + assertEquals("DecimalRecordFixed", schema.getName(), "Should have the correct record name"); + assertEquals(LogicalTypes.decimal(9, 2), LogicalTypes.fromSchema(schema.getField("decimal").schema()), + "Should have the correct logical type"); DecimalRecordFixed record = new DecimalRecordFixed(); record.decimal = new BigDecimal("3.14"); File test = write(REFLECT, schema, record); - Assert.assertEquals("Should match the decimal after round trip", Collections.singletonList(record), - read(REFLECT.createDatumReader(schema), test)); + assertEquals(Collections.singletonList(record), read(REFLECT.createDatumReader(schema), test), + "Should match the decimal after round trip"); } public static class Pair { @@ -230,7 +230,7 @@ public static class PairRecord { @Test @SuppressWarnings("unchecked") - public void testPairRecord() throws IOException { + void pairRecord() throws IOException { ReflectData model = new ReflectData(); model.addLogicalTypeConversion(new Conversion() { @Override @@ -258,11 +258,11 @@ public IndexedRecord toRecord(Pair value, Schema schema, LogicalType type) { }); LogicalTypes.register("pair", new LogicalTypes.LogicalTypeFactory() { - private final LogicalType PAIR = new LogicalType("pair"); + private final LogicalType pair = new LogicalType("pair"); @Override public LogicalType fromSchema(Schema schema) { - return PAIR; + return pair; } @Override @@ -272,11 +272,11 @@ public String getTypeName() { }); Schema schema = model.getSchema(PairRecord.class); - Assert.assertEquals("Should have the correct record name", "org.apache.avro.reflect.TestReflectLogicalTypes", - schema.getNamespace()); - Assert.assertEquals("Should have the correct record name", "PairRecord", schema.getName()); - Assert.assertEquals("Should have the correct logical type", "pair", - LogicalTypes.fromSchema(schema.getField("pair").schema()).getName()); + assertEquals("org.apache.avro.reflect.TestReflectLogicalTypes", schema.getNamespace(), + "Should have the correct record name"); + assertEquals("PairRecord", schema.getName(), "Should have the correct record name"); + assertEquals("pair", LogicalTypes.fromSchema(schema.getField("pair").schema()).getName(), + "Should have the correct logical type"); PairRecord record = new PairRecord(); record.pair = Pair.of(34L, 35L); @@ -286,12 +286,12 @@ public String getTypeName() { File test = write(model, schema, record); Pair actual = ((PairRecord) TestReflectLogicalTypes .read(model.createDatumReader(schema), test).get(0)).pair; - Assert.assertEquals("Data should match after serialization round-trip", 34L, (long) actual.first); - Assert.assertEquals("Data should match after serialization round-trip", 35L, (long) actual.second); + assertEquals(34L, (long) actual.first, "Data should match after serialization round-trip"); + assertEquals(35L, (long) actual.second, "Data should match after serialization round-trip"); } @Test - public void testReadUUID() throws IOException { + void readUUID() throws IOException { Schema uuidSchema = SchemaBuilder.record(RecordWithUUID.class.getName()).fields().requiredString("uuid") .endRecord(); LogicalTypes.uuid().addToSchema(uuidSchema.getField("uuid").schema()); @@ -310,19 +310,19 @@ public void testReadUUID() throws IOException { File test = write(ReflectData.get().getSchema(RecordWithStringUUID.class), r1, r2); - Assert.assertEquals("Should convert Strings to UUIDs", expected, read(REFLECT.createDatumReader(uuidSchema), test)); + assertEquals(expected, read(REFLECT.createDatumReader(uuidSchema), test), "Should convert Strings to UUIDs"); // verify that the field's type overrides the logical type Schema uuidStringSchema = SchemaBuilder.record(RecordWithStringUUID.class.getName()).fields().requiredString("uuid") .endRecord(); LogicalTypes.uuid().addToSchema(uuidStringSchema.getField("uuid").schema()); - Assert.assertEquals("Should not convert to UUID if accessor is String", Arrays.asList(r1, r2), - read(REFLECT.createDatumReader(uuidStringSchema), test)); + assertEquals(Arrays.asList(r1, r2), read(REFLECT.createDatumReader(uuidStringSchema), test), + "Should not convert to UUID if accessor is String"); } @Test - public void testWriteUUID() throws IOException { + void writeUUID() throws IOException { Schema uuidSchema = SchemaBuilder.record(RecordWithUUID.class.getName()).fields().requiredString("uuid") .endRecord(); LogicalTypes.uuid().addToSchema(uuidSchema.getField("uuid").schema()); @@ -345,16 +345,16 @@ public void testWriteUUID() throws IOException { Schema uuidStringSchema = SchemaBuilder.record(RecordWithStringUUID.class.getName()).fields().requiredString("uuid") .endRecord(); - Assert.assertEquals("Should read uuid as String without UUID conversion", expected, - read(REFLECT.createDatumReader(uuidStringSchema), test)); + assertEquals(expected, read(REFLECT.createDatumReader(uuidStringSchema), test), + "Should read uuid as String without UUID conversion"); LogicalTypes.uuid().addToSchema(uuidStringSchema.getField("uuid").schema()); - Assert.assertEquals("Should read uuid as String without UUID logical type", expected, - read(ReflectData.get().createDatumReader(uuidStringSchema), test)); + assertEquals(expected, read(ReflectData.get().createDatumReader(uuidStringSchema), test), + "Should read uuid as String without UUID logical type"); } @Test - public void testWriteNullableUUID() throws IOException { + void writeNullableUUID() throws IOException { Schema nullableUuidSchema = SchemaBuilder.record(RecordWithUUID.class.getName()).fields().optionalString("uuid") .endRecord(); LogicalTypes.uuid().addToSchema(nullableUuidSchema.getField("uuid").schema().getTypes().get(1)); @@ -377,12 +377,12 @@ public void testWriteNullableUUID() throws IOException { Schema nullableUuidStringSchema = SchemaBuilder.record(RecordWithStringUUID.class.getName()).fields() .optionalString("uuid").endRecord(); - Assert.assertEquals("Should read uuid as String without UUID conversion", expected, - read(ReflectData.get().createDatumReader(nullableUuidStringSchema), test)); + assertEquals(expected, read(ReflectData.get().createDatumReader(nullableUuidStringSchema), test), + "Should read uuid as String without UUID conversion"); } @Test - public void testWriteNullableUUIDReadRequiredString() throws IOException { + void writeNullableUUIDReadRequiredString() throws IOException { Schema nullableUuidSchema = SchemaBuilder.record(RecordWithUUID.class.getName()).fields().optionalString("uuid") .endRecord(); LogicalTypes.uuid().addToSchema(nullableUuidSchema.getField("uuid").schema().getTypes().get(1)); @@ -405,68 +405,30 @@ public void testWriteNullableUUIDReadRequiredString() throws IOException { Schema uuidStringSchema = SchemaBuilder.record(RecordWithStringUUID.class.getName()).fields().requiredString("uuid") .endRecord(); - Assert.assertEquals("Should read uuid as String without UUID conversion", expected, - read(REFLECT.createDatumReader(uuidStringSchema), test)); + assertEquals(expected, read(REFLECT.createDatumReader(uuidStringSchema), test), + "Should read uuid as String without UUID conversion"); } @Test - public void testReadUUIDMissingLogicalTypeUnsafe() throws IOException { - String unsafeValue = System.getProperty("avro.disable.unsafe"); - try { - // only one FieldAccess can be set per JVM - System.clearProperty("avro.disable.unsafe"); - Assume.assumeTrue(ReflectionUtil.getFieldAccess() instanceof FieldAccessUnsafe); - - Schema uuidSchema = SchemaBuilder.record(RecordWithUUID.class.getName()).fields().requiredString("uuid") - .endRecord(); - LogicalTypes.uuid().addToSchema(uuidSchema.getField("uuid").schema()); - - UUID u1 = UUID.randomUUID(); - - RecordWithStringUUID r1 = new RecordWithStringUUID(); - r1.uuid = u1.toString(); - - File test = write(ReflectData.get().getSchema(RecordWithStringUUID.class), r1); - - RecordWithUUID datum = (RecordWithUUID) read(ReflectData.get().createDatumReader(uuidSchema), test).get(0); - Object uuid = datum.uuid; - Assert.assertTrue("UUID should be a String (unsafe)", uuid instanceof String); - } finally { - if (unsafeValue != null) { - System.setProperty("avro.disable.unsafe", unsafeValue); - } - } - } + void readUUIDMissingLogicalTypeReflect() throws IOException { + Assumptions.assumeTrue(ReflectionUtil.getFieldAccess() instanceof FieldAccessReflect); - @Test(expected = IllegalArgumentException.class) - public void testReadUUIDMissingLogicalTypeReflect() throws IOException { - String unsafeValue = System.getProperty("avro.disable.unsafe"); - try { - // only one FieldAccess can be set per JVM - System.setProperty("avro.disable.unsafe", "true"); - Assume.assumeTrue(ReflectionUtil.getFieldAccess() instanceof FieldAccessReflect); - - Schema uuidSchema = SchemaBuilder.record(RecordWithUUID.class.getName()).fields().requiredString("uuid") - .endRecord(); - LogicalTypes.uuid().addToSchema(uuidSchema.getField("uuid").schema()); - - UUID u1 = UUID.randomUUID(); + Schema uuidSchema = SchemaBuilder.record(RecordWithUUID.class.getName()).fields().requiredString("uuid") + .endRecord(); + LogicalTypes.uuid().addToSchema(uuidSchema.getField("uuid").schema()); - RecordWithStringUUID r1 = new RecordWithStringUUID(); - r1.uuid = u1.toString(); + UUID u1 = UUID.randomUUID(); - File test = write(ReflectData.get().getSchema(RecordWithStringUUID.class), r1); + RecordWithStringUUID r1 = new RecordWithStringUUID(); + r1.uuid = u1.toString(); - read(ReflectData.get().createDatumReader(uuidSchema), test).get(0); - } finally { - if (unsafeValue != null) { - System.setProperty("avro.disable.unsafe", unsafeValue); - } - } + File test = write(ReflectData.get().getSchema(RecordWithStringUUID.class), r1); + RecordWithUUID result = (RecordWithUUID) read(ReflectData.get().createDatumReader(uuidSchema), test).get(0); + assertEquals(u1, result.uuid); } - @Test(expected = DataFileWriter.AppendWriteException.class) - public void testWriteUUIDMissingLogicalType() throws IOException { + @Test + void writeUUIDMissingLogicalType() throws IOException { Schema uuidSchema = SchemaBuilder.record(RecordWithUUID.class.getName()).fields().requiredString("uuid") .endRecord(); LogicalTypes.uuid().addToSchema(uuidSchema.getField("uuid").schema()); @@ -488,11 +450,14 @@ public void testWriteUUIDMissingLogicalType() throws IOException { // this fails with an AppendWriteException wrapping ClassCastException // because the UUID isn't converted to a CharSequence expected internally - read(ReflectData.get().createDatumReader(uuidStringSchema), test); + List items = (List) read( + ReflectData.get().createDatumReader(uuidStringSchema), test); + assertEquals(r1.uuid.toString(), items.get(0).uuid); + assertEquals(r2.uuid.toString(), items.get(1).uuid); } @Test - public void testReadUUIDGenericRecord() throws IOException { + void readUUIDGenericRecord() throws IOException { Schema uuidSchema = SchemaBuilder.record("RecordWithUUID").fields().requiredString("uuid").endRecord(); LogicalTypes.uuid().addToSchema(uuidSchema.getField("uuid").schema()); @@ -511,19 +476,19 @@ public void testReadUUIDGenericRecord() throws IOException { File test = write(ReflectData.get().getSchema(RecordWithStringUUID.class), r1, r2); - Assert.assertEquals("Should convert Strings to UUIDs", expected, read(REFLECT.createDatumReader(uuidSchema), test)); + assertEquals(expected, read(REFLECT.createDatumReader(uuidSchema), test), "Should convert Strings to UUIDs"); // verify that the field's type overrides the logical type Schema uuidStringSchema = SchemaBuilder.record(RecordWithStringUUID.class.getName()).fields().requiredString("uuid") .endRecord(); LogicalTypes.uuid().addToSchema(uuidSchema.getField("uuid").schema()); - Assert.assertEquals("Should not convert to UUID if accessor is String", Arrays.asList(r1, r2), - read(REFLECT.createDatumReader(uuidStringSchema), test)); + assertEquals(Arrays.asList(r1, r2), read(REFLECT.createDatumReader(uuidStringSchema), test), + "Should not convert to UUID if accessor is String"); } @Test - public void testReadUUIDArray() throws IOException { + void readUUIDArray() throws IOException { Schema uuidArraySchema = SchemaBuilder.record(RecordWithUUIDArray.class.getName()).fields().name("uuids").type() .array().items().stringType().noDefault().endRecord(); LogicalTypes.uuid().addToSchema(uuidArraySchema.getField("uuids").schema().getElementType()); @@ -539,12 +504,12 @@ public void testReadUUIDArray() throws IOException { File test = write(uuidArraySchema, r); - Assert.assertEquals("Should convert Strings to UUIDs", expected, - read(REFLECT.createDatumReader(uuidArraySchema), test).get(0)); + assertEquals(expected, read(REFLECT.createDatumReader(uuidArraySchema), test).get(0), + "Should convert Strings to UUIDs"); } @Test - public void testWriteUUIDArray() throws IOException { + void writeUUIDArray() throws IOException { Schema uuidArraySchema = SchemaBuilder.record(RecordWithUUIDArray.class.getName()).fields().name("uuids").type() .array().items().stringType().noDefault().endRecord(); LogicalTypes.uuid().addToSchema(uuidArraySchema.getField("uuids").schema().getElementType()); @@ -567,12 +532,12 @@ public void testWriteUUIDArray() throws IOException { File test = write(REFLECT, uuidArraySchema, r); - Assert.assertEquals("Should read UUIDs as Strings", expected, - read(ReflectData.get().createDatumReader(stringArraySchema), test).get(0)); + assertEquals(expected, read(ReflectData.get().createDatumReader(stringArraySchema), test).get(0), + "Should read UUIDs as Strings"); } @Test - public void testReadUUIDList() throws IOException { + void readUUIDList() throws IOException { Schema uuidListSchema = SchemaBuilder.record(RecordWithUUIDList.class.getName()).fields().name("uuids").type() .array().items().stringType().noDefault().endRecord(); uuidListSchema.getField("uuids").schema().addProp(SpecificData.CLASS_PROP, List.class.getName()); @@ -589,12 +554,12 @@ public void testReadUUIDList() throws IOException { File test = write(uuidListSchema, r); - Assert.assertEquals("Should convert Strings to UUIDs", expected, - read(REFLECT.createDatumReader(uuidListSchema), test).get(0)); + assertEquals(expected, read(REFLECT.createDatumReader(uuidListSchema), test).get(0), + "Should convert Strings to UUIDs"); } @Test - public void testWriteUUIDList() throws IOException { + void writeUUIDList() throws IOException { Schema uuidListSchema = SchemaBuilder.record(RecordWithUUIDList.class.getName()).fields().name("uuids").type() .array().items().stringType().noDefault().endRecord(); uuidListSchema.getField("uuids").schema().addProp(SpecificData.CLASS_PROP, List.class.getName()); @@ -615,20 +580,20 @@ public void testWriteUUIDList() throws IOException { File test = write(REFLECT, uuidListSchema, r); - Assert.assertEquals("Should read UUIDs as Strings", expected, - read(REFLECT.createDatumReader(stringArraySchema), test).get(0)); + assertEquals(expected, read(REFLECT.createDatumReader(stringArraySchema), test).get(0), + "Should read UUIDs as Strings"); } @Test - public void testReflectedSchemaLocalDateTime() { + void reflectedSchemaLocalDateTime() { Schema actual = REFLECT.getSchema(RecordWithTimestamps.class); - Assert.assertEquals("Should have the correct record name", "org.apache.avro.reflect", actual.getNamespace()); - Assert.assertEquals("Should have the correct record name", "RecordWithTimestamps", actual.getName()); - Assert.assertEquals("Should have the correct physical type", Schema.Type.LONG, - actual.getField("localDateTime").schema().getType()); - Assert.assertEquals("Should have the correct logical type", LogicalTypes.localTimestampMillis(), - LogicalTypes.fromSchema(actual.getField("localDateTime").schema())); + assertEquals("org.apache.avro.reflect", actual.getNamespace(), "Should have the correct record name"); + assertEquals("RecordWithTimestamps", actual.getName(), "Should have the correct record name"); + assertEquals(Schema.Type.LONG, actual.getField("localDateTime").schema().getType(), + "Should have the correct physical type"); + assertEquals(LogicalTypes.localTimestampMillis(), + LogicalTypes.fromSchema(actual.getField("localDateTime").schema()), "Should have the correct logical type"); } private static List read(DatumReader reader, File file) throws IOException { @@ -649,7 +614,7 @@ private File write(Schema schema, D... data) throws IOException { @SuppressWarnings("unchecked") private File write(GenericData model, Schema schema, D... data) throws IOException { - File file = temp.newFile(); + File file = File.createTempFile("junit", null, temp); DatumWriter writer = model.createDatumWriter(schema); try (DataFileWriter fileWriter = new DataFileWriter<>(writer)) { @@ -764,6 +729,6 @@ public boolean equals(Object obj) { return false; } RecordWithTimestamps that = (RecordWithTimestamps) obj; - return Objects.equals(that.localDateTime, that.localDateTime); + return Objects.equals(localDateTime, that.localDateTime); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectionUtil.java b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectionUtil.java deleted file mode 100644 index 515f9f345eb..00000000000 --- a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectionUtil.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.avro.reflect; - -import java.io.IOException; -import java.io.InputStream; - -import org.junit.Test; - -public class TestReflectionUtil { - - @Test - public void testUnsafeUtil() { - new Tester().checkUnsafe(); - } - - @Test - public void testUnsafeWhenNotExists() throws Exception { - ClassLoader cl = new NoUnsafe(); - Class testerClass = cl.loadClass(Tester.class.getName()); - testerClass.getDeclaredMethod("checkUnsafe").invoke(testerClass.getDeclaredConstructor().newInstance()); - } - - public static final class Tester { - public Tester() { - } - - public void checkUnsafe() { - ReflectionUtil.getFieldAccess(); - } - - } - - private static final class NoUnsafe extends ClassLoader { - private ClassLoader parent = TestReflectionUtil.class.getClassLoader(); - - @Override - public java.lang.Class loadClass(String name) throws ClassNotFoundException { - Class clazz = findLoadedClass(name); - if (clazz != null) { - return clazz; - } - if ("sun.misc.Unsafe".equals(name)) { - throw new ClassNotFoundException(name); - } - if (!name.startsWith("org.apache.avro.")) { - return parent.loadClass(name); - } - - InputStream data = parent.getResourceAsStream(name.replace('.', '/') + ".class"); - byte[] buf = new byte[10240]; // big enough, too lazy to loop - int size; - try { - size = data.read(buf); - } catch (IOException e) { - throw new ClassNotFoundException(); - } - clazz = defineClass(name, buf, 0, size); - resolveClass(clazz); - return clazz; - } - - } -} diff --git a/lang/java/avro/src/test/java/org/apache/avro/specific/TestRecordWithLogicalTypes.java b/lang/java/avro/src/test/java/org/apache/avro/specific/TestRecordWithLogicalTypes.java index b7a89db6e59..65eb4ebf6b1 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/specific/TestRecordWithLogicalTypes.java +++ b/lang/java/avro/src/test/java/org/apache/avro/specific/TestRecordWithLogicalTypes.java @@ -1,10 +1,23 @@ -/** - * Autogenerated by Avro +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * DO NOT EDIT DIRECTLY + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ package org.apache.avro.specific; +import org.apache.avro.Conversions; import org.apache.avro.data.TimeConversions; import org.apache.avro.message.BinaryMessageDecoder; import org.apache.avro.message.BinaryMessageEncoder; @@ -15,8 +28,8 @@ public class TestRecordWithLogicalTypes extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord { private static final long serialVersionUID = 3313339903648295220L; - public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse( - "{\"type\":\"record\",\"name\":\"TestRecordWithLogicalTypes\",\"namespace\":\"org.apache.avro.specific\",\"fields\":[{\"name\":\"b\",\"type\":\"boolean\"},{\"name\":\"i32\",\"type\":\"int\"},{\"name\":\"i64\",\"type\":\"long\"},{\"name\":\"f32\",\"type\":\"float\"},{\"name\":\"f64\",\"type\":\"double\"},{\"name\":\"s\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"d\",\"type\":{\"type\":\"int\",\"logicalType\":\"date\"}},{\"name\":\"t\",\"type\":{\"type\":\"int\",\"logicalType\":\"time-millis\"}},{\"name\":\"ts\",\"type\":{\"type\":\"long\",\"logicalType\":\"timestamp-millis\"}},{\"name\":\"dec\",\"type\":{\"type\":\"bytes\",\"logicalType\":\"decimal\",\"precision\":9,\"scale\":2}}]}"); + public static final org.apache.avro.Schema SCHEMA$ = org.apache.avro.JsonSchemaParser.parseInternal( + "{\"type\":\"record\",\"name\":\"TestRecordWithLogicalTypes\",\"namespace\":\"org.apache.avro.specific\",\"fields\":[{\"name\":\"b\",\"type\":\"boolean\"},{\"name\":\"i32\",\"type\":\"int\"},{\"name\":\"i64\",\"type\":\"long\"},{\"name\":\"f32\",\"type\":\"float\"},{\"name\":\"f64\",\"type\":\"double\"},{\"name\":\"s\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"d\",\"type\":{\"type\":\"int\",\"logicalType\":\"date\"}},{\"name\":\"t\",\"type\":{\"type\":\"int\",\"logicalType\":\"time-millis\"}},{\"name\":\"ts\",\"type\":{\"type\":\"long\",\"logicalType\":\"timestamp-millis\"}},{\"name\":\"dec\",\"type\":{\"type\":\"bytes\",\"logicalType\":\"decimal\",\"precision\":9,\"scale\":2}},{\"name\":\"bd\",\"type\":{\"type\":\"bytes\",\"logicalType\":\"big-decimal\"}}]}"); public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; @@ -77,6 +90,8 @@ public static TestRecordWithLogicalTypes fromByteBuffer(java.nio.ByteBuffer b) t public java.time.Instant ts; @Deprecated public java.math.BigDecimal dec; + @Deprecated + public java.math.BigDecimal bd; /** * Default constructor. Note that this does not initialize fields to their @@ -99,10 +114,11 @@ public TestRecordWithLogicalTypes() { * @param t The new value for t * @param ts The new value for ts * @param dec The new value for dec + * @param bd The new value for bd */ public TestRecordWithLogicalTypes(java.lang.Boolean b, java.lang.Integer i32, java.lang.Long i64, java.lang.Float f32, java.lang.Double f64, java.lang.CharSequence s, java.time.LocalDate d, java.time.LocalTime t, - java.time.Instant ts, java.math.BigDecimal dec) { + java.time.Instant ts, java.math.BigDecimal dec, java.math.BigDecimal bd) { this.b = b; this.i32 = i32; this.i64 = i64; @@ -113,6 +129,7 @@ public TestRecordWithLogicalTypes(java.lang.Boolean b, java.lang.Integer i32, ja this.t = t; this.ts = ts; this.dec = dec; + this.bd = bd; } @Override @@ -144,18 +161,24 @@ public java.lang.Object get(int field$) { return ts; case 9: return dec; + case 10: + return bd; default: - throw new org.apache.avro.AvroRuntimeException("Bad index"); + throw new org.apache.avro.AvroRuntimeException("Bad index " + field$); } } protected static final org.apache.avro.Conversions.DecimalConversion DECIMAL_CONVERSION = new org.apache.avro.Conversions.DecimalConversion(); + + protected static final Conversions.BigDecimalConversion BIG_DECIMAL_CONVERSION = new org.apache.avro.Conversions.BigDecimalConversion(); + protected static final TimeConversions.DateConversion DATE_CONVERSION = new TimeConversions.DateConversion(); protected static final TimeConversions.TimeMillisConversion TIME_CONVERSION = new TimeConversions.TimeMillisConversion(); protected static final TimeConversions.TimestampMillisConversion TIMESTAMP_CONVERSION = new TimeConversions.TimestampMillisConversion(); private static final org.apache.avro.Conversion[] conversions = new org.apache.avro.Conversion[] { null, null, - null, null, null, null, DATE_CONVERSION, TIME_CONVERSION, TIMESTAMP_CONVERSION, DECIMAL_CONVERSION, null }; + null, null, null, null, DATE_CONVERSION, TIME_CONVERSION, TIMESTAMP_CONVERSION, DECIMAL_CONVERSION, + BIG_DECIMAL_CONVERSION }; @Override public org.apache.avro.Conversion getConversion(int field) { @@ -197,6 +220,9 @@ public void put(int field$, java.lang.Object value$) { case 9: dec = (java.math.BigDecimal) value$; break; + case 10: + bd = (java.math.BigDecimal) value$; + break; default: throw new org.apache.avro.AvroRuntimeException("Bad index"); } @@ -438,6 +464,8 @@ public static class Builder extends org.apache.avro.specific.SpecificRecordBuild private java.time.Instant ts; private java.math.BigDecimal dec; + private java.math.BigDecimal bd; + /** Creates a new Builder */ private Builder() { super(SCHEMA$); @@ -490,6 +518,10 @@ private Builder(TestRecordWithLogicalTypes.Builder other) { this.dec = data().deepCopy(fields()[9].schema(), other.dec); fieldSetFlags()[9] = other.fieldSetFlags()[9]; } + if (isValidValue(fields()[10], other.bd)) { + this.bd = data().deepCopy(fields()[10].schema(), other.bd); + fieldSetFlags()[10] = other.fieldSetFlags()[10]; + } } /** @@ -539,6 +571,10 @@ private Builder(TestRecordWithLogicalTypes other) { this.dec = data().deepCopy(fields()[9].schema(), other.dec); fieldSetFlags()[9] = true; } + if (isValidValue(fields()[10], other.bd)) { + this.bd = data().deepCopy(fields()[10].schema(), other.bd); + fieldSetFlags()[10] = true; + } } /** @@ -968,6 +1004,7 @@ public TestRecordWithLogicalTypes build() { record.t = fieldSetFlags()[7] ? this.t : (java.time.LocalTime) defaultValue(fields()[7]); record.ts = fieldSetFlags()[8] ? this.ts : (java.time.Instant) defaultValue(fields()[8]); record.dec = fieldSetFlags()[9] ? this.dec : (java.math.BigDecimal) defaultValue(fields()[9]); + record.bd = fieldSetFlags()[10] ? this.dec : (java.math.BigDecimal) defaultValue(fields()[10]); return record; } catch (java.lang.Exception e) { throw new org.apache.avro.AvroRuntimeException(e); diff --git a/lang/java/avro/src/test/java/org/apache/avro/specific/TestRecordWithMapsAndArrays.java b/lang/java/avro/src/test/java/org/apache/avro/specific/TestRecordWithMapsAndArrays.java new file mode 100644 index 00000000000..1ffe36b79d1 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/specific/TestRecordWithMapsAndArrays.java @@ -0,0 +1,875 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.specific; + +import org.apache.avro.generic.GenericArray; +import org.apache.avro.specific.SpecificData; +import org.apache.avro.util.Utf8; +import org.apache.avro.message.BinaryMessageEncoder; +import org.apache.avro.message.BinaryMessageDecoder; +import org.apache.avro.message.SchemaStore; + +@AvroGenerated +public class TestRecordWithMapsAndArrays extends SpecificRecordBase implements SpecificRecord { + private static final long serialVersionUID = -3823801533006425147L; + + public static final org.apache.avro.Schema SCHEMA$ = org.apache.avro.JsonSchemaParser.parseInternal( + "{\"type\":\"record\",\"name\":\"TestRecordWithMapsAndArrays\",\"namespace\":\"org.apache.avro.specific\",\"fields\":[{\"name\":\"arr\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"string\",\"avro.java.string\":\"String\"},\"default\":[]}},{\"name\":\"map\",\"type\":{\"type\":\"map\",\"values\":\"long\",\"avro.java.string\":\"String\",\"default\":{}}},{\"name\":\"nested_arr\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"array\",\"items\":{\"type\":\"string\",\"avro.java.string\":\"String\"},\"default\":[]},\"default\":[]}},{\"name\":\"nested_map\",\"type\":{\"type\":\"map\",\"values\":{\"type\":\"map\",\"values\":\"long\",\"avro.java.string\":\"String\",\"default\":{}},\"avro.java.string\":\"String\",\"default\":{}}}]}"); + + public static org.apache.avro.Schema getClassSchema() { + return SCHEMA$; + } + + private static final SpecificData MODEL$ = new SpecificData(); + + private static final BinaryMessageEncoder ENCODER = new BinaryMessageEncoder<>(MODEL$, + SCHEMA$); + + private static final BinaryMessageDecoder DECODER = new BinaryMessageDecoder<>(MODEL$, + SCHEMA$); + + /** + * Return the BinaryMessageEncoder instance used by this class. + * + * @return the message encoder used by this class + */ + public static BinaryMessageEncoder getEncoder() { + return ENCODER; + } + + /** + * Return the BinaryMessageDecoder instance used by this class. + * + * @return the message decoder used by this class + */ + public static BinaryMessageDecoder getDecoder() { + return DECODER; + } + + /** + * Create a new BinaryMessageDecoder instance for this class that uses the + * specified {@link SchemaStore}. + * + * @param resolver a {@link SchemaStore} used to find schemas by fingerprint + * @return a BinaryMessageDecoder instance for this class backed by the given + * SchemaStore + */ + public static BinaryMessageDecoder createDecoder(SchemaStore resolver) { + return new BinaryMessageDecoder<>(MODEL$, SCHEMA$, resolver); + } + + /** + * Serializes this TestRecordWithMapsAndArrays to a ByteBuffer. + * + * @return a buffer holding the serialized data for this instance + * @throws java.io.IOException if this instance could not be serialized + */ + public java.nio.ByteBuffer toByteBuffer() throws java.io.IOException { + return ENCODER.encode(this); + } + + /** + * Deserializes a TestRecordWithMapsAndArrays from a ByteBuffer. + * + * @param b a byte buffer holding serialized data for an instance of this class + * @return a TestRecordWithMapsAndArrays instance decoded from the given buffer + * @throws java.io.IOException if the given bytes could not be deserialized into + * an instance of this class + */ + public static TestRecordWithMapsAndArrays fromByteBuffer(java.nio.ByteBuffer b) throws java.io.IOException { + return DECODER.decode(b); + } + + private java.util.List arr; + private java.util.Map map; + private java.util.List> nested_arr; + private java.util.Map> nested_map; + + /** + * Default constructor. Note that this does not initialize fields to their + * default values from the schema. If that is desired then one should use + * newBuilder(). + */ + public TestRecordWithMapsAndArrays() { + } + + /** + * All-args constructor. + * + * @param arr The new value for arr + * @param map The new value for map + * @param nested_arr The new value for nested_arr + * @param nested_map The new value for nested_map + */ + public TestRecordWithMapsAndArrays(java.util.List arr, java.util.Map map, + java.util.List> nested_arr, + java.util.Map> nested_map) { + this.arr = arr; + this.map = map; + this.nested_arr = nested_arr; + this.nested_map = nested_map; + } + + @Override + public SpecificData getSpecificData() { + return MODEL$; + } + + @Override + public org.apache.avro.Schema getSchema() { + return SCHEMA$; + } + + // Used by DatumWriter. Applications should not call. + @Override + public Object get(int field$) { + switch (field$) { + case 0: + return arr; + case 1: + return map; + case 2: + return nested_arr; + case 3: + return nested_map; + default: + throw new IndexOutOfBoundsException("Invalid index: " + field$); + } + } + + // Used by DatumReader. Applications should not call. + @Override + @SuppressWarnings(value = "unchecked") + public void put(int field$, Object value$) { + switch (field$) { + case 0: + arr = (java.util.List) value$; + break; + case 1: + map = (java.util.Map) value$; + break; + case 2: + nested_arr = (java.util.List>) value$; + break; + case 3: + nested_map = (java.util.Map>) value$; + break; + default: + throw new IndexOutOfBoundsException("Invalid index: " + field$); + } + } + + /** + * Gets the value of the 'arr' field. + * + * @return The value of the 'arr' field. + */ + public java.util.List getArr() { + return arr; + } + + /** + * Sets the value of the 'arr' field. + * + * @param value the value to set. + */ + public void setArr(java.util.List value) { + this.arr = value; + } + + /** + * Gets the value of the 'map' field. + * + * @return The value of the 'map' field. + */ + public java.util.Map getMap() { + return map; + } + + /** + * Sets the value of the 'map' field. + * + * @param value the value to set. + */ + public void setMap(java.util.Map value) { + this.map = value; + } + + /** + * Gets the value of the 'nested_arr' field. + * + * @return The value of the 'nested_arr' field. + */ + public java.util.List> getNestedArr() { + return nested_arr; + } + + /** + * Sets the value of the 'nested_arr' field. + * + * @param value the value to set. + */ + public void setNestedArr(java.util.List> value) { + this.nested_arr = value; + } + + /** + * Gets the value of the 'nested_map' field. + * + * @return The value of the 'nested_map' field. + */ + public java.util.Map> getNestedMap() { + return nested_map; + } + + /** + * Sets the value of the 'nested_map' field. + * + * @param value the value to set. + */ + public void setNestedMap(java.util.Map> value) { + this.nested_map = value; + } + + /** + * Creates a new TestRecordWithMapsAndArrays RecordBuilder. + * + * @return A new TestRecordWithMapsAndArrays RecordBuilder + */ + public static Builder newBuilder() { + return new Builder(); + } + + /** + * Creates a new TestRecordWithMapsAndArrays RecordBuilder by copying an + * existing Builder. + * + * @param other The existing builder to copy. + * @return A new TestRecordWithMapsAndArrays RecordBuilder + */ + public static Builder newBuilder(Builder other) { + if (other == null) { + return new Builder(); + } else { + return new Builder(other); + } + } + + /** + * Creates a new TestRecordWithMapsAndArrays RecordBuilder by copying an + * existing TestRecordWithMapsAndArrays instance. + * + * @param other The existing instance to copy. + * @return A new TestRecordWithMapsAndArrays RecordBuilder + */ + public static Builder newBuilder(TestRecordWithMapsAndArrays other) { + if (other == null) { + return new Builder(); + } else { + return new Builder(other); + } + } + + /** + * RecordBuilder for TestRecordWithMapsAndArrays instances. + */ + @AvroGenerated + public static class Builder extends SpecificRecordBuilderBase + implements org.apache.avro.data.RecordBuilder { + + private java.util.List arr; + private java.util.Map map; + private java.util.List> nested_arr; + private java.util.Map> nested_map; + + /** Creates a new Builder */ + private Builder() { + super(SCHEMA$, MODEL$); + } + + /** + * Creates a Builder by copying an existing Builder. + * + * @param other The existing Builder to copy. + */ + private Builder(Builder other) { + super(other); + if (isValidValue(fields()[0], other.arr)) { + this.arr = data().deepCopy(fields()[0].schema(), other.arr); + fieldSetFlags()[0] = other.fieldSetFlags()[0]; + } + if (isValidValue(fields()[1], other.map)) { + this.map = data().deepCopy(fields()[1].schema(), other.map); + fieldSetFlags()[1] = other.fieldSetFlags()[1]; + } + if (isValidValue(fields()[2], other.nested_arr)) { + this.nested_arr = data().deepCopy(fields()[2].schema(), other.nested_arr); + fieldSetFlags()[2] = other.fieldSetFlags()[2]; + } + if (isValidValue(fields()[3], other.nested_map)) { + this.nested_map = data().deepCopy(fields()[3].schema(), other.nested_map); + fieldSetFlags()[3] = other.fieldSetFlags()[3]; + } + } + + /** + * Creates a Builder by copying an existing TestRecordWithMapsAndArrays instance + * + * @param other The existing instance to copy. + */ + private Builder(TestRecordWithMapsAndArrays other) { + super(SCHEMA$, MODEL$); + if (isValidValue(fields()[0], other.arr)) { + this.arr = data().deepCopy(fields()[0].schema(), other.arr); + fieldSetFlags()[0] = true; + } + if (isValidValue(fields()[1], other.map)) { + this.map = data().deepCopy(fields()[1].schema(), other.map); + fieldSetFlags()[1] = true; + } + if (isValidValue(fields()[2], other.nested_arr)) { + this.nested_arr = data().deepCopy(fields()[2].schema(), other.nested_arr); + fieldSetFlags()[2] = true; + } + if (isValidValue(fields()[3], other.nested_map)) { + this.nested_map = data().deepCopy(fields()[3].schema(), other.nested_map); + fieldSetFlags()[3] = true; + } + } + + /** + * Gets the value of the 'arr' field. + * + * @return The value. + */ + public java.util.List getArr() { + return arr; + } + + /** + * Sets the value of the 'arr' field. + * + * @param value The value of 'arr'. + * @return This builder. + */ + public Builder setArr(java.util.List value) { + validate(fields()[0], value); + this.arr = value; + fieldSetFlags()[0] = true; + return this; + } + + /** + * Checks whether the 'arr' field has been set. + * + * @return True if the 'arr' field has been set, false otherwise. + */ + public boolean hasArr() { + return fieldSetFlags()[0]; + } + + /** + * Clears the value of the 'arr' field. + * + * @return This builder. + */ + public Builder clearArr() { + arr = null; + fieldSetFlags()[0] = false; + return this; + } + + /** + * Gets the value of the 'map' field. + * + * @return The value. + */ + public java.util.Map getMap() { + return map; + } + + /** + * Sets the value of the 'map' field. + * + * @param value The value of 'map'. + * @return This builder. + */ + public Builder setMap(java.util.Map value) { + validate(fields()[1], value); + this.map = value; + fieldSetFlags()[1] = true; + return this; + } + + /** + * Checks whether the 'map' field has been set. + * + * @return True if the 'map' field has been set, false otherwise. + */ + public boolean hasMap() { + return fieldSetFlags()[1]; + } + + /** + * Clears the value of the 'map' field. + * + * @return This builder. + */ + public Builder clearMap() { + map = null; + fieldSetFlags()[1] = false; + return this; + } + + /** + * Gets the value of the 'nested_arr' field. + * + * @return The value. + */ + public java.util.List> getNestedArr() { + return nested_arr; + } + + /** + * Sets the value of the 'nested_arr' field. + * + * @param value The value of 'nested_arr'. + * @return This builder. + */ + public Builder setNestedArr(java.util.List> value) { + validate(fields()[2], value); + this.nested_arr = value; + fieldSetFlags()[2] = true; + return this; + } + + /** + * Checks whether the 'nested_arr' field has been set. + * + * @return True if the 'nested_arr' field has been set, false otherwise. + */ + public boolean hasNestedArr() { + return fieldSetFlags()[2]; + } + + /** + * Clears the value of the 'nested_arr' field. + * + * @return This builder. + */ + public Builder clearNestedArr() { + nested_arr = null; + fieldSetFlags()[2] = false; + return this; + } + + /** + * Gets the value of the 'nested_map' field. + * + * @return The value. + */ + public java.util.Map> getNestedMap() { + return nested_map; + } + + /** + * Sets the value of the 'nested_map' field. + * + * @param value The value of 'nested_map'. + * @return This builder. + */ + public Builder setNestedMap(java.util.Map> value) { + validate(fields()[3], value); + this.nested_map = value; + fieldSetFlags()[3] = true; + return this; + } + + /** + * Checks whether the 'nested_map' field has been set. + * + * @return True if the 'nested_map' field has been set, false otherwise. + */ + public boolean hasNestedMap() { + return fieldSetFlags()[3]; + } + + /** + * Clears the value of the 'nested_map' field. + * + * @return This builder. + */ + public Builder clearNestedMap() { + nested_map = null; + fieldSetFlags()[3] = false; + return this; + } + + @Override + @SuppressWarnings("unchecked") + public TestRecordWithMapsAndArrays build() { + try { + TestRecordWithMapsAndArrays record = new TestRecordWithMapsAndArrays(); + record.arr = fieldSetFlags()[0] ? this.arr : (java.util.List) defaultValue(fields()[0]); + record.map = fieldSetFlags()[1] ? this.map : (java.util.Map) defaultValue(fields()[1]); + record.nested_arr = fieldSetFlags()[2] ? this.nested_arr + : (java.util.List>) defaultValue(fields()[2]); + record.nested_map = fieldSetFlags()[3] ? this.nested_map + : (java.util.Map>) defaultValue(fields()[3]); + return record; + } catch (org.apache.avro.AvroMissingFieldException e) { + throw e; + } catch (Exception e) { + throw new org.apache.avro.AvroRuntimeException(e); + } + } + } + + @SuppressWarnings("unchecked") + private static final org.apache.avro.io.DatumWriter WRITER$ = (org.apache.avro.io.DatumWriter) MODEL$ + .createDatumWriter(SCHEMA$); + + @Override + public void writeExternal(java.io.ObjectOutput out) throws java.io.IOException { + WRITER$.write(this, SpecificData.getEncoder(out)); + } + + @SuppressWarnings("unchecked") + private static final org.apache.avro.io.DatumReader READER$ = (org.apache.avro.io.DatumReader) MODEL$ + .createDatumReader(SCHEMA$); + + @Override + public void readExternal(java.io.ObjectInput in) throws java.io.IOException { + READER$.read(this, SpecificData.getDecoder(in)); + } + + @Override + protected boolean hasCustomCoders() { + return true; + } + + @Override + public void customEncode(org.apache.avro.io.Encoder out) throws java.io.IOException { + long size0 = this.arr.size(); + out.writeArrayStart(); + out.setItemCount(size0); + long actualSize0 = 0; + for (String e0 : this.arr) { + actualSize0++; + out.startItem(); + out.writeString(e0); + } + out.writeArrayEnd(); + if (actualSize0 != size0) + throw new java.util.ConcurrentModificationException( + "Array-size written was " + size0 + ", but element count was " + actualSize0 + "."); + + long size1 = this.map.size(); + out.writeMapStart(); + out.setItemCount(size1); + long actualSize1 = 0; + for (java.util.Map.Entry e1 : this.map.entrySet()) { + actualSize1++; + out.startItem(); + out.writeString(e1.getKey()); + Long v1 = e1.getValue(); + out.writeLong(v1); + } + out.writeMapEnd(); + if (actualSize1 != size1) + throw new java.util.ConcurrentModificationException( + "Map-size written was " + size1 + ", but element count was " + actualSize1 + "."); + + long size2 = this.nested_arr.size(); + out.writeArrayStart(); + out.setItemCount(size2); + long actualSize2 = 0; + for (java.util.List e2 : this.nested_arr) { + actualSize2++; + out.startItem(); + long size3 = e2.size(); + out.writeArrayStart(); + out.setItemCount(size3); + long actualSize3 = 0; + for (String e3 : e2) { + actualSize3++; + out.startItem(); + out.writeString(e3); + } + out.writeArrayEnd(); + if (actualSize3 != size3) + throw new java.util.ConcurrentModificationException( + "Array-size written was " + size3 + ", but element count was " + actualSize3 + "."); + } + out.writeArrayEnd(); + if (actualSize2 != size2) + throw new java.util.ConcurrentModificationException( + "Array-size written was " + size2 + ", but element count was " + actualSize2 + "."); + + long size4 = this.nested_map.size(); + out.writeMapStart(); + out.setItemCount(size4); + long actualSize4 = 0; + for (java.util.Map.Entry> e4 : this.nested_map.entrySet()) { + actualSize4++; + out.startItem(); + out.writeString(e4.getKey()); + java.util.Map v4 = e4.getValue(); + long size5 = v4.size(); + out.writeMapStart(); + out.setItemCount(size5); + long actualSize5 = 0; + for (java.util.Map.Entry e5 : v4.entrySet()) { + actualSize5++; + out.startItem(); + out.writeString(e5.getKey()); + Long v5 = e5.getValue(); + out.writeLong(v5); + } + out.writeMapEnd(); + if (actualSize5 != size5) + throw new java.util.ConcurrentModificationException( + "Map-size written was " + size5 + ", but element count was " + actualSize5 + "."); + } + out.writeMapEnd(); + if (actualSize4 != size4) + throw new java.util.ConcurrentModificationException( + "Map-size written was " + size4 + ", but element count was " + actualSize4 + "."); + + } + + @Override + public void customDecode(org.apache.avro.io.ResolvingDecoder in) throws java.io.IOException { + org.apache.avro.Schema.Field[] fieldOrder = in.readFieldOrderIfDiff(); + if (fieldOrder == null) { + long size0 = in.readArrayStart(); + java.util.List a0 = this.arr; + if (a0 == null) { + a0 = new SpecificData.Array((int) size0, SCHEMA$.getField("arr").schema()); + this.arr = a0; + } else + a0.clear(); + SpecificData.Array ga0 = (a0 instanceof SpecificData.Array ? (SpecificData.Array) a0 : null); + for (; 0 < size0; size0 = in.arrayNext()) { + for (; size0 != 0; size0--) { + String e0 = (ga0 != null ? ga0.peek() : null); + e0 = in.readString(); + a0.add(e0); + } + } + + long size1 = in.readMapStart(); + java.util.Map m1 = this.map; // Need fresh name due to limitation of macro system + if (m1 == null) { + m1 = new java.util.HashMap((int) size1); + this.map = m1; + } else + m1.clear(); + for (; 0 < size1; size1 = in.mapNext()) { + for (; size1 != 0; size1--) { + String k1 = null; + k1 = in.readString(); + Long v1 = null; + v1 = in.readLong(); + m1.put(k1, v1); + } + } + + long size2 = in.readArrayStart(); + java.util.List> a2 = this.nested_arr; + if (a2 == null) { + a2 = new SpecificData.Array>((int) size2, SCHEMA$.getField("nested_arr").schema()); + this.nested_arr = a2; + } else + a2.clear(); + SpecificData.Array> ga2 = (a2 instanceof SpecificData.Array + ? (SpecificData.Array>) a2 + : null); + for (; 0 < size2; size2 = in.arrayNext()) { + for (; size2 != 0; size2--) { + java.util.List e2 = (ga2 != null ? ga2.peek() : null); + long size3 = in.readArrayStart(); + java.util.List a3 = e2; + if (a3 == null) { + a3 = new SpecificData.Array((int) size3, SCHEMA$.getField("nested_arr").schema().getElementType()); + e2 = a3; + } else + a3.clear(); + SpecificData.Array ga3 = (a3 instanceof SpecificData.Array ? (SpecificData.Array) a3 : null); + for (; 0 < size3; size3 = in.arrayNext()) { + for (; size3 != 0; size3--) { + String e3 = (ga3 != null ? ga3.peek() : null); + e3 = in.readString(); + a3.add(e3); + } + } + a2.add(e2); + } + } + + long size4 = in.readMapStart(); + java.util.Map> m4 = this.nested_map; // Need fresh name due to limitation of + // macro system + if (m4 == null) { + m4 = new java.util.HashMap>((int) size4); + this.nested_map = m4; + } else + m4.clear(); + for (; 0 < size4; size4 = in.mapNext()) { + for (; size4 != 0; size4--) { + String k4 = null; + k4 = in.readString(); + java.util.Map v4 = null; + long size5 = in.readMapStart(); + java.util.Map m5 = v4; // Need fresh name due to limitation of macro system + if (m5 == null) { + m5 = new java.util.HashMap((int) size5); + v4 = m5; + } else + m5.clear(); + for (; 0 < size5; size5 = in.mapNext()) { + for (; size5 != 0; size5--) { + String k5 = null; + k5 = in.readString(); + Long v5 = null; + v5 = in.readLong(); + m5.put(k5, v5); + } + } + m4.put(k4, v4); + } + } + + } else { + for (int i = 0; i < 4; i++) { + switch (fieldOrder[i].pos()) { + case 0: + long size0 = in.readArrayStart(); + java.util.List a0 = this.arr; + if (a0 == null) { + a0 = new SpecificData.Array((int) size0, SCHEMA$.getField("arr").schema()); + this.arr = a0; + } else + a0.clear(); + SpecificData.Array ga0 = (a0 instanceof SpecificData.Array ? (SpecificData.Array) a0 : null); + for (; 0 < size0; size0 = in.arrayNext()) { + for (; size0 != 0; size0--) { + String e0 = (ga0 != null ? ga0.peek() : null); + e0 = in.readString(); + a0.add(e0); + } + } + break; + + case 1: + long size1 = in.readMapStart(); + java.util.Map m1 = this.map; // Need fresh name due to limitation of macro system + if (m1 == null) { + m1 = new java.util.HashMap((int) size1); + this.map = m1; + } else + m1.clear(); + for (; 0 < size1; size1 = in.mapNext()) { + for (; size1 != 0; size1--) { + String k1 = null; + k1 = in.readString(); + Long v1 = null; + v1 = in.readLong(); + m1.put(k1, v1); + } + } + break; + + case 2: + long size2 = in.readArrayStart(); + java.util.List> a2 = this.nested_arr; + if (a2 == null) { + a2 = new SpecificData.Array>((int) size2, SCHEMA$.getField("nested_arr").schema()); + this.nested_arr = a2; + } else + a2.clear(); + SpecificData.Array> ga2 = (a2 instanceof SpecificData.Array + ? (SpecificData.Array>) a2 + : null); + for (; 0 < size2; size2 = in.arrayNext()) { + for (; size2 != 0; size2--) { + java.util.List e2 = (ga2 != null ? ga2.peek() : null); + long size3 = in.readArrayStart(); + java.util.List a3 = e2; + if (a3 == null) { + a3 = new SpecificData.Array((int) size3, + SCHEMA$.getField("nested_arr").schema().getElementType()); + e2 = a3; + } else + a3.clear(); + SpecificData.Array ga3 = (a3 instanceof SpecificData.Array ? (SpecificData.Array) a3 + : null); + for (; 0 < size3; size3 = in.arrayNext()) { + for (; size3 != 0; size3--) { + String e3 = (ga3 != null ? ga3.peek() : null); + e3 = in.readString(); + a3.add(e3); + } + } + a2.add(e2); + } + } + break; + + case 3: + long size4 = in.readMapStart(); + java.util.Map> m4 = this.nested_map; // Need fresh name due to limitation + // of macro system + if (m4 == null) { + m4 = new java.util.HashMap>((int) size4); + this.nested_map = m4; + } else + m4.clear(); + for (; 0 < size4; size4 = in.mapNext()) { + for (; size4 != 0; size4--) { + String k4 = null; + k4 = in.readString(); + java.util.Map v4 = null; + long size5 = in.readMapStart(); + java.util.Map m5 = v4; // Need fresh name due to limitation of macro system + if (m5 == null) { + m5 = new java.util.HashMap((int) size5); + v4 = m5; + } else + m5.clear(); + for (; 0 < size5; size5 = in.mapNext()) { + for (; size5 != 0; size5--) { + String k5 = null; + k5 = in.readString(); + Long v5 = null; + v5 = in.readLong(); + m5.put(k5, v5); + } + } + m4.put(k4, v4); + } + } + break; + + default: + throw new java.io.IOException("Corrupt ResolvingDecoder."); + } + } + } + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/specific/TestRecordWithoutLogicalTypes.java b/lang/java/avro/src/test/java/org/apache/avro/specific/TestRecordWithoutLogicalTypes.java index f53f1537ffd..2d54638af69 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/specific/TestRecordWithoutLogicalTypes.java +++ b/lang/java/avro/src/test/java/org/apache/avro/specific/TestRecordWithoutLogicalTypes.java @@ -1,4 +1,4 @@ -/** +/* * Autogenerated by Avro * * DO NOT EDIT DIRECTLY @@ -14,7 +14,7 @@ @org.apache.avro.specific.AvroGenerated public class TestRecordWithoutLogicalTypes extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord { - public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse( + public static final org.apache.avro.Schema SCHEMA$ = org.apache.avro.JsonSchemaParser.parseInternal( "{\"type\":\"record\",\"name\":\"TestRecordWithoutLogicalTypes\",\"namespace\":\"org.apache.avro.specific\",\"fields\":[{\"name\":\"b\",\"type\":\"boolean\"},{\"name\":\"i32\",\"type\":\"int\"},{\"name\":\"i64\",\"type\":\"long\"},{\"name\":\"f32\",\"type\":\"float\"},{\"name\":\"f64\",\"type\":\"double\"},{\"name\":\"s\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"d\",\"type\":{\"type\":\"int\",\"logicalType\":\"date\"}},{\"name\":\"t\",\"type\":{\"type\":\"int\",\"logicalType\":\"time-millis\"}},{\"name\":\"ts\",\"type\":{\"type\":\"long\",\"logicalType\":\"timestamp-millis\"}},{\"name\":\"dec\",\"type\":{\"type\":\"bytes\",\"logicalType\":\"decimal\",\"precision\":9,\"scale\":2}}]}"); public static org.apache.avro.Schema getClassSchema() { diff --git a/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificData.java b/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificData.java index 12d8ddbcc83..1d9d58b0518 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificData.java +++ b/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificData.java @@ -18,7 +18,7 @@ package org.apache.avro.specific; -import static org.junit.Assert.*; +import static org.junit.jupiter.api.Assertions.*; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -35,8 +35,8 @@ import org.apache.avro.io.DatumWriter; import org.apache.avro.io.Encoder; import org.apache.avro.io.EncoderFactory; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; /* * If integerClass is primitive, reflection to find method will @@ -47,7 +47,7 @@ public class TestSpecificData { private Class intClass; private Class integerClass; - @Before + @BeforeEach public void setUp() { Schema intSchema = Schema.create(Type.INT); intClass = SpecificData.get().getClass(intSchema); @@ -57,29 +57,33 @@ public void setUp() { } @Test - public void testClassTypes() { + void classTypes() { assertTrue(intClass.isPrimitive()); assertFalse(integerClass.isPrimitive()); } @Test - public void testPrimitiveParam() throws Exception { + void primitiveParam() throws Exception { assertNotNull(Reflection.class.getMethod("primitive", intClass)); } - @Test(expected = NoSuchMethodException.class) - public void testPrimitiveParamError() throws Exception { - Reflection.class.getMethod("primitiveWrapper", intClass); + @Test + void primitiveParamError() throws Exception { + assertThrows(NoSuchMethodException.class, () -> { + Reflection.class.getMethod("primitiveWrapper", intClass); + }); } @Test - public void testPrimitiveWrapperParam() throws Exception { + void primitiveWrapperParam() throws Exception { assertNotNull(Reflection.class.getMethod("primitiveWrapper", integerClass)); } - @Test(expected = NoSuchMethodException.class) - public void testPrimitiveWrapperParamError() throws Exception { - Reflection.class.getMethod("primitive", integerClass); + @Test + void primitiveWrapperParamError() throws Exception { + assertThrows(NoSuchMethodException.class, () -> { + Reflection.class.getMethod("primitive", integerClass); + }); } static class Reflection { @@ -136,7 +140,7 @@ public Schema getSchema() { } @Test - public void testSpecificRecordBase() { + void specificRecordBase() { final TestRecord record = new TestRecord(); record.put("x", 1); record.put("y", "str"); @@ -145,7 +149,7 @@ public void testSpecificRecordBase() { } @Test - public void testExternalizeable() throws Exception { + void externalizeable() throws Exception { final TestRecord before = new TestRecord(); before.put("x", 1); before.put("y", "str"); @@ -162,7 +166,7 @@ public void testExternalizeable() throws Exception { /** Tests that non Stringable datum are rejected by specific writers. */ @Test - public void testNonStringable() throws Exception { + void nonStringable() throws Exception { final Schema string = Schema.create(Type.STRING); final ByteArrayOutputStream baos = new ByteArrayOutputStream(); final Encoder encoder = EncoderFactory.get().directBinaryEncoder(baos, null); @@ -174,4 +178,30 @@ public void testNonStringable() throws Exception { // Expected error } } + + @Test + void testToByteArray() throws Exception { + final Schema string = Schema.create(Type.STRING); + final DatumWriter writer = new SpecificDatumWriter<>(string); + + try (final ByteArrayOutputStream baos = new ByteArrayOutputStream()) { + final Encoder encoder = EncoderFactory.get().directBinaryEncoder(baos, null); + writer.write("test", encoder); + + final byte[] bytes = writer.toByteArray("test"); + assertArrayEquals(baos.toByteArray(), bytes); + } + } + + @Test + void classNameContainingReservedWords() { + final Schema schema = Schema.createRecord("AnyName", null, "db.public.table", false); + + assertEquals("db.public$.table.AnyName", SpecificData.getClassName(schema)); + } + + @Test + void testCanGetClassOfMangledType() { + assertEquals("org.apache.avro.specific.int$", SpecificData.getClassName(int$.getClassSchema())); + } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificDatumReader.java b/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificDatumReader.java new file mode 100644 index 00000000000..3c10b74cde1 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificDatumReader.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.specific; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +import org.apache.avro.Schema; +import org.apache.avro.io.BinaryDecoder; +import org.apache.avro.io.BinaryEncoder; +import org.apache.avro.io.DecoderFactory; +import org.apache.avro.io.EncoderFactory; +import org.apache.avro.util.Utf8; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class TestSpecificDatumReader { + + @Test + void readMyData() throws IOException { + // Check that method newInstanceFromString from SpecificDatumReader extension is + // called. + final EncoderFactory e_factory = new EncoderFactory().configureBufferSize(30); + final DecoderFactory factory = new DecoderFactory().configureDecoderBufferSize(30); + + final MyReader reader = new MyReader(); + reader.setExpected(Schema.create(Schema.Type.STRING)); + reader.setSchema(Schema.create(Schema.Type.STRING)); + + final ByteArrayOutputStream out = new ByteArrayOutputStream(30); + final BinaryEncoder encoder = e_factory.binaryEncoder(out, null); + encoder.writeString(new Utf8("Hello")); + encoder.flush(); + + final BinaryDecoder decoder = factory.binaryDecoder(out.toByteArray(), null); + reader.getData().setFastReaderEnabled(false); + final MyData read = reader.read(null, decoder); + Assertions.assertNotNull(read, "MyReader.newInstanceFromString was not called"); + Assertions.assertEquals("Hello", read.getContent()); + } + + public static class MyData { + private final String content; + + public MyData(String content) { + this.content = content; + } + + public String getContent() { + return content; + } + } + + public static class MyReader extends SpecificDatumReader { + + @Override + protected Class findStringClass(Schema schema) { + return MyData.class; + } + + @Override + protected Object newInstanceFromString(Class c, String s) { + return new MyData(s); + } + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificRecordWithUnion.java b/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificRecordWithUnion.java new file mode 100644 index 00000000000..70f3e7ac90a --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificRecordWithUnion.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.specific; + +import org.apache.avro.Schema; +import org.apache.avro.SchemaBuilder; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.generic.GenericDatumWriter; + +import org.apache.avro.io.EncoderFactory; +import org.apache.avro.io.DecoderFactory; +import org.apache.avro.io.DatumReader; +import org.apache.avro.io.DatumWriter; +import org.apache.avro.io.BinaryEncoder; +import org.apache.avro.io.Decoder; + +import org.apache.avro.util.ClassSecurityValidator; +import org.apache.avro.util.ClassSecurityValidator.ClassSecurityPredicate; +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.math.BigDecimal; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; + +public class TestSpecificRecordWithUnion { + /** + * Test that the deserialization of a class that is not trusted throws a + * SecurityException. + */ + @Test + void testNotSerializableClasses() throws IOException { + final TestUnionRecord record = TestUnionRecord.newBuilder().setAmount(BigDecimal.ZERO).build(); + final Schema schema = SchemaBuilder.unionOf().nullType().and().type(record.getSchema()).endUnion(); + + byte[] recordBytes = serializeRecord( + "{ \"org.apache.avro.specific.TestUnionRecord\": { \"amount\": { \"bytes\": \"\\u0000\" } } }", schema); + + ClassSecurityPredicate originalValidator = ClassSecurityValidator.getGlobal(); + try { + ClassSecurityValidator.setGlobal(ClassSecurityValidator.builder().build()); + assertThrows(SecurityException.class, () -> deserializeRecord(schema, recordBytes)); + } finally { + ClassSecurityValidator.setGlobal(originalValidator); + } + } + + @Test + void unionLogicalDecimalConversion() throws IOException { + final TestUnionRecord record = TestUnionRecord.newBuilder().setAmount(BigDecimal.ZERO).build(); + final Schema schema = SchemaBuilder.unionOf().nullType().and().type(record.getSchema()).endUnion(); + + byte[] recordBytes = serializeRecord( + "{ \"org.apache.avro.specific.TestUnionRecord\": { \"amount\": { \"bytes\": \"\\u0000\" } } }", schema); + + assertEquals(record, deserializeRecord(schema, recordBytes)); + } + + private static SpecificRecord deserializeRecord(Schema schema, byte[] recordBytes) throws IOException { + SpecificDatumReader specificDatumReader = new SpecificDatumReader<>(schema); + ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(recordBytes); + Decoder decoder = DecoderFactory.get().binaryDecoder(byteArrayInputStream, null); + return specificDatumReader.read(null, decoder); + } + + public static byte[] serializeRecord(String value, Schema schema) throws IOException { + DatumReader reader = new GenericDatumReader<>(schema); + Object object = reader.read(null, DecoderFactory.get().jsonDecoder(schema, value)); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + BinaryEncoder encoder = EncoderFactory.get().directBinaryEncoder(out, null); + DatumWriter writer = new GenericDatumWriter<>(schema); + writer.write(object, encoder); + encoder.flush(); + byte[] bytes = out.toByteArray(); + out.close(); + return bytes; + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificToFromByteArray.java b/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificToFromByteArray.java index 9bf40059bf4..f81dde37407 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificToFromByteArray.java +++ b/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificToFromByteArray.java @@ -20,9 +20,12 @@ import org.apache.avro.Conversions; import org.apache.avro.LogicalTypes; import org.apache.avro.message.MissingSchemaException; -import org.junit.Test; - +import org.junit.jupiter.api.Test; import java.io.IOException; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + import java.math.BigDecimal; import java.nio.ByteBuffer; import java.time.Instant; @@ -30,12 +33,10 @@ import java.time.LocalTime; import java.time.temporal.ChronoUnit; -import static org.junit.Assert.assertEquals; - public class TestSpecificToFromByteArray { @Test - public void testSpecificToFromByteBufferWithLogicalTypes() throws IOException { + void specificToFromByteBufferWithLogicalTypes() throws IOException { // Java 9+ comes with NANO precision and since we encode it using millis // precision // Therefore we won't want to have NANOs in the input @@ -43,7 +44,7 @@ public void testSpecificToFromByteBufferWithLogicalTypes() throws IOException { Instant instant = Instant.now().truncatedTo(ChronoUnit.MILLIS); final TestRecordWithLogicalTypes record = new TestRecordWithLogicalTypes(true, 34, 35L, 3.14F, 3019.34, null, - LocalDate.now(), t, instant, new BigDecimal("123.45")); + LocalDate.now(), t, instant, new BigDecimal("123.45"), new BigDecimal(-23.456562323)); final ByteBuffer b = record.toByteBuffer(); final TestRecordWithLogicalTypes copy = TestRecordWithLogicalTypes.fromByteBuffer(b); @@ -52,7 +53,7 @@ public void testSpecificToFromByteBufferWithLogicalTypes() throws IOException { } @Test - public void testSpecificToFromByteBufferWithoutLogicalTypes() throws IOException { + void specificToFromByteBufferWithoutLogicalTypes() throws IOException { final TestRecordWithoutLogicalTypes record = new TestRecordWithoutLogicalTypes(true, 34, 35L, 3.14F, 3019.34, null, (int) System.currentTimeMillis() / 1000, (int) System.currentTimeMillis() / 1000, System.currentTimeMillis(), new Conversions.DecimalConversion().toBytes(new BigDecimal("123.45"), null, LogicalTypes.decimal(9, 2))); @@ -63,23 +64,28 @@ public void testSpecificToFromByteBufferWithoutLogicalTypes() throws IOException assertEquals(record, copy); } - @Test(expected = MissingSchemaException.class) - public void testSpecificByteArrayIncompatibleWithLogicalTypes() throws IOException { - final TestRecordWithoutLogicalTypes withoutLogicalTypes = new TestRecordWithoutLogicalTypes(true, 34, 35L, 3.14F, - 3019.34, null, (int) System.currentTimeMillis() / 1000, (int) System.currentTimeMillis() / 1000, - System.currentTimeMillis(), - new Conversions.DecimalConversion().toBytes(new BigDecimal("123.45"), null, LogicalTypes.decimal(9, 2))); + @Test + void specificByteArrayIncompatibleWithLogicalTypes() throws IOException { + assertThrows(MissingSchemaException.class, () -> { + final TestRecordWithoutLogicalTypes withoutLogicalTypes = new TestRecordWithoutLogicalTypes(true, 34, 35L, 3.14F, + 3019.34, null, (int) System.currentTimeMillis() / 1000, (int) System.currentTimeMillis() / 1000, + System.currentTimeMillis(), + new Conversions.DecimalConversion().toBytes(new BigDecimal("123.45"), null, LogicalTypes.decimal(9, 2))); - final ByteBuffer b = withoutLogicalTypes.toByteBuffer(); - TestRecordWithLogicalTypes.fromByteBuffer(b); + final ByteBuffer b = withoutLogicalTypes.toByteBuffer(); + TestRecordWithLogicalTypes.fromByteBuffer(b); + }); } - @Test(expected = MissingSchemaException.class) - public void testSpecificByteArrayIncompatibleWithoutLogicalTypes() throws IOException { - final TestRecordWithLogicalTypes withLogicalTypes = new TestRecordWithLogicalTypes(true, 34, 35L, 3.14F, 3019.34, - null, LocalDate.now(), LocalTime.now(), Instant.now(), new BigDecimal("123.45")); + @Test + void specificByteArrayIncompatibleWithoutLogicalTypes() throws IOException { + assertThrows(MissingSchemaException.class, () -> { + final TestRecordWithLogicalTypes withLogicalTypes = new TestRecordWithLogicalTypes(true, 34, 35L, 3.14F, 3019.34, + null, LocalDate.now(), LocalTime.now(), Instant.now(), new BigDecimal("123.45"), + new BigDecimal(-23.456562323)); - final ByteBuffer b = withLogicalTypes.toByteBuffer(); - TestRecordWithoutLogicalTypes.fromByteBuffer(b); + final ByteBuffer b = withLogicalTypes.toByteBuffer(); + TestRecordWithoutLogicalTypes.fromByteBuffer(b); + }); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/specific/TestUnionRecord.java b/lang/java/avro/src/test/java/org/apache/avro/specific/TestUnionRecord.java new file mode 100644 index 00000000000..88ffc7fc7eb --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/specific/TestUnionRecord.java @@ -0,0 +1,324 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.specific; + +import org.apache.avro.message.BinaryMessageDecoder; +import org.apache.avro.message.BinaryMessageEncoder; +import org.apache.avro.message.SchemaStore; + +@SuppressWarnings("all") +@AvroGenerated +public class TestUnionRecord extends SpecificRecordBase implements SpecificRecord { + private static final long serialVersionUID = -3829374192747523457L; + + public static final org.apache.avro.Schema SCHEMA$ = org.apache.avro.JsonSchemaParser.parseInternal( + "{\"type\":\"record\",\"name\":\"TestUnionRecord\",\"namespace\":\"org.apache.avro.specific\",\"fields\":[{\"name\":\"amount\",\"type\":[\"null\",{\"type\":\"bytes\",\"logicalType\":\"decimal\",\"precision\":31,\"scale\":8}],\"default\":null}]}"); + + public static org.apache.avro.Schema getClassSchema() { + return SCHEMA$; + } + + private static final SpecificData MODEL$ = new SpecificData(); + static { + MODEL$.addLogicalTypeConversion(new org.apache.avro.Conversions.DecimalConversion()); + } + + private static final BinaryMessageEncoder ENCODER = new BinaryMessageEncoder(MODEL$, + SCHEMA$); + + private static final BinaryMessageDecoder DECODER = new BinaryMessageDecoder(MODEL$, + SCHEMA$); + + /** + * Return the BinaryMessageEncoder instance used by this class. + * + * @return the message encoder used by this class + */ + public static BinaryMessageEncoder getEncoder() { + return ENCODER; + } + + /** + * Return the BinaryMessageDecoder instance used by this class. + * + * @return the message decoder used by this class + */ + public static BinaryMessageDecoder getDecoder() { + return DECODER; + } + + /** + * Create a new BinaryMessageDecoder instance for this class that uses the + * specified {@link SchemaStore}. + * + * @param resolver a {@link SchemaStore} used to find schemas by fingerprint + * @return a BinaryMessageDecoder instance for this class backed by the given + * SchemaStore + */ + public static BinaryMessageDecoder createDecoder(SchemaStore resolver) { + return new BinaryMessageDecoder(MODEL$, SCHEMA$, resolver); + } + + /** + * Serializes this TestUnionRecord to a ByteBuffer. + * + * @return a buffer holding the serialized data for this instance + * @throws java.io.IOException if this instance could not be serialized + */ + public java.nio.ByteBuffer toByteBuffer() throws java.io.IOException { + return ENCODER.encode(this); + } + + /** + * Deserializes a TestUnionRecord from a ByteBuffer. + * + * @param b a byte buffer holding serialized data for an instance of this class + * @return a TestUnionRecord instance decoded from the given buffer + * @throws java.io.IOException if the given bytes could not be deserialized into + * an instance of this class + */ + public static TestUnionRecord fromByteBuffer(java.nio.ByteBuffer b) throws java.io.IOException { + return DECODER.decode(b); + } + + private java.math.BigDecimal amount; + + /** + * Default constructor. Note that this does not initialize fields to their + * default values from the schema. If that is desired then one should use + * newBuilder(). + */ + public TestUnionRecord() { + } + + /** + * All-args constructor. + * + * @param amount The new value for amount + */ + public TestUnionRecord(java.math.BigDecimal amount) { + this.amount = amount; + } + + @Override + public SpecificData getSpecificData() { + return MODEL$; + } + + @Override + public org.apache.avro.Schema getSchema() { + return SCHEMA$; + } + + // Used by DatumWriter. Applications should not call. + @Override + public Object get(int field$) { + switch (field$) { + case 0: + return amount; + default: + throw new IndexOutOfBoundsException("Invalid index: " + field$); + } + } + + // Used by DatumReader. Applications should not call. + @Override + @SuppressWarnings(value = "unchecked") + public void put(int field$, Object value$) { + switch (field$) { + case 0: + amount = (java.math.BigDecimal) value$; + break; + default: + throw new IndexOutOfBoundsException("Invalid index: " + field$); + } + } + + /** + * Gets the value of the 'amount' field. + * + * @return The value of the 'amount' field. + */ + public java.math.BigDecimal getAmount() { + return amount; + } + + /** + * Sets the value of the 'amount' field. + * + * @param value the value to set. + */ + public void setAmount(java.math.BigDecimal value) { + this.amount = value; + } + + /** + * Creates a new TestUnionRecord RecordBuilder. + * + * @return A new TestUnionRecord RecordBuilder + */ + public static Builder newBuilder() { + return new Builder(); + } + + /** + * Creates a new TestUnionRecord RecordBuilder by copying an existing Builder. + * + * @param other The existing builder to copy. + * @return A new TestUnionRecord RecordBuilder + */ + public static Builder newBuilder(Builder other) { + if (other == null) { + return new Builder(); + } else { + return new Builder(other); + } + } + + /** + * Creates a new TestUnionRecord RecordBuilder by copying an existing + * TestUnionRecord instance. + * + * @param other The existing instance to copy. + * @return A new TestUnionRecord RecordBuilder + */ + public static Builder newBuilder(TestUnionRecord other) { + if (other == null) { + return new Builder(); + } else { + return new Builder(other); + } + } + + /** + * RecordBuilder for TestUnionRecord instances. + */ + @AvroGenerated + public static class Builder extends SpecificRecordBuilderBase + implements org.apache.avro.data.RecordBuilder { + + private java.math.BigDecimal amount; + + /** Creates a new Builder */ + private Builder() { + super(SCHEMA$, MODEL$); + } + + /** + * Creates a Builder by copying an existing Builder. + * + * @param other The existing Builder to copy. + */ + private Builder(Builder other) { + super(other); + if (isValidValue(fields()[0], other.amount)) { + this.amount = data().deepCopy(fields()[0].schema(), other.amount); + fieldSetFlags()[0] = other.fieldSetFlags()[0]; + } + } + + /** + * Creates a Builder by copying an existing TestUnionRecord instance + * + * @param other The existing instance to copy. + */ + private Builder(TestUnionRecord other) { + super(SCHEMA$, MODEL$); + if (isValidValue(fields()[0], other.amount)) { + this.amount = data().deepCopy(fields()[0].schema(), other.amount); + fieldSetFlags()[0] = true; + } + } + + /** + * Gets the value of the 'amount' field. + * + * @return The value. + */ + public java.math.BigDecimal getAmount() { + return amount; + } + + /** + * Sets the value of the 'amount' field. + * + * @param value The value of 'amount'. + * @return This builder. + */ + public Builder setAmount(java.math.BigDecimal value) { + validate(fields()[0], value); + this.amount = value; + fieldSetFlags()[0] = true; + return this; + } + + /** + * Checks whether the 'amount' field has been set. + * + * @return True if the 'amount' field has been set, false otherwise. + */ + public boolean hasAmount() { + return fieldSetFlags()[0]; + } + + /** + * Clears the value of the 'amount' field. + * + * @return This builder. + */ + public Builder clearAmount() { + amount = null; + fieldSetFlags()[0] = false; + return this; + } + + @Override + @SuppressWarnings("unchecked") + public TestUnionRecord build() { + try { + TestUnionRecord record = new TestUnionRecord(); + record.amount = fieldSetFlags()[0] ? this.amount : (java.math.BigDecimal) defaultValue(fields()[0]); + return record; + } catch (org.apache.avro.AvroMissingFieldException e) { + throw e; + } catch (Exception e) { + throw new org.apache.avro.AvroRuntimeException(e); + } + } + } + + @SuppressWarnings("unchecked") + private static final org.apache.avro.io.DatumWriter WRITER$ = (org.apache.avro.io.DatumWriter) MODEL$ + .createDatumWriter(SCHEMA$); + + @Override + public void writeExternal(java.io.ObjectOutput out) throws java.io.IOException { + WRITER$.write(this, SpecificData.getEncoder(out)); + } + + @SuppressWarnings("unchecked") + private static final org.apache.avro.io.DatumReader READER$ = (org.apache.avro.io.DatumReader) MODEL$ + .createDatumReader(SCHEMA$); + + @Override + public void readExternal(java.io.ObjectInput in) throws java.io.IOException { + READER$.read(this, SpecificData.getDecoder(in)); + } + +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/specific/int$.java b/lang/java/avro/src/test/java/org/apache/avro/specific/int$.java new file mode 100644 index 00000000000..2a1a1470f8f --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/specific/int$.java @@ -0,0 +1,229 @@ +/* + * Autogenerated by Avro + * + * DO NOT EDIT DIRECTLY + */ +package org.apache.avro.specific; + +import com.fasterxml.jackson.databind.jsonschema.JsonSchema; +import org.apache.avro.JsonSchemaParser; +import org.apache.avro.message.BinaryMessageDecoder; +import org.apache.avro.message.BinaryMessageEncoder; +import org.apache.avro.message.SchemaStore; + +@AvroGenerated +public class int$ extends SpecificRecordBase implements SpecificRecord { + private static final long serialVersionUID = 3003385205621277651L; + + public static final org.apache.avro.Schema SCHEMA$ = JsonSchemaParser + .parseInternal("{\"type\":\"record\",\"name\":\"int\",\"namespace\":\"org.apache.avro.specific\",\"fields\":[]}"); + + public static org.apache.avro.Schema getClassSchema() { + return SCHEMA$; + } + + private static final SpecificData MODEL$ = new SpecificData(); + + private static final BinaryMessageEncoder ENCODER = new BinaryMessageEncoder<>(MODEL$, SCHEMA$); + + private static final BinaryMessageDecoder DECODER = new BinaryMessageDecoder<>(MODEL$, SCHEMA$); + + /** + * Return the BinaryMessageEncoder instance used by this class. + * + * @return the message encoder used by this class + */ + public static BinaryMessageEncoder getEncoder() { + return ENCODER; + } + + /** + * Return the BinaryMessageDecoder instance used by this class. + * + * @return the message decoder used by this class + */ + public static BinaryMessageDecoder getDecoder() { + return DECODER; + } + + /** + * Create a new BinaryMessageDecoder instance for this class that uses the + * specified {@link SchemaStore}. + * + * @param resolver a {@link SchemaStore} used to find schemas by fingerprint + * @return a BinaryMessageDecoder instance for this class backed by the given + * SchemaStore + */ + public static BinaryMessageDecoder createDecoder(SchemaStore resolver) { + return new BinaryMessageDecoder<>(MODEL$, SCHEMA$, resolver); + } + + /** + * Serializes this int to a ByteBuffer. + * + * @return a buffer holding the serialized data for this instance + * @throws java.io.IOException if this instance could not be serialized + */ + public java.nio.ByteBuffer toByteBuffer() throws java.io.IOException { + return ENCODER.encode(this); + } + + /** + * Deserializes a int from a ByteBuffer. + * + * @param b a byte buffer holding serialized data for an instance of this class + * @return a int instance decoded from the given buffer + * @throws java.io.IOException if the given bytes could not be deserialized into + * an instance of this class + */ + public static int$ fromByteBuffer(java.nio.ByteBuffer b) throws java.io.IOException { + return DECODER.decode(b); + } + + public SpecificData getSpecificData() { + return MODEL$; + } + + public org.apache.avro.Schema getSchema() { + return SCHEMA$; + } + + // Used by DatumWriter. Applications should not call. + public Object get(int field$) { + switch (field$) { + default: + throw new IndexOutOfBoundsException("Invalid index: " + field$); + } + } + + // Used by DatumReader. Applications should not call. + @SuppressWarnings(value = "unchecked") + public void put(int field$, Object value$) { + switch (field$) { + default: + throw new IndexOutOfBoundsException("Invalid index: " + field$); + } + } + + /** + * Creates a new int$ RecordBuilder. + * + * @return A new int$ RecordBuilder + */ + public static Builder newBuilder() { + return new Builder(); + } + + /** + * Creates a new int$ RecordBuilder by copying an existing Builder. + * + * @param other The existing builder to copy. + * @return A new int$ RecordBuilder + */ + public static Builder newBuilder(Builder other) { + if (other == null) { + return new Builder(); + } else { + return new Builder(other); + } + } + + /** + * Creates a new int$ RecordBuilder by copying an existing int$ instance. + * + * @param other The existing instance to copy. + * @return A new int$ RecordBuilder + */ + public static Builder newBuilder(int$ other) { + if (other == null) { + return new Builder(); + } else { + return new Builder(other); + } + } + + /** + * RecordBuilder for int$ instances. + */ + @AvroGenerated + public static class Builder extends SpecificRecordBuilderBase + implements org.apache.avro.data.RecordBuilder { + + /** Creates a new Builder */ + private Builder() { + super(SCHEMA$, MODEL$); + } + + /** + * Creates a Builder by copying an existing Builder. + * + * @param other The existing Builder to copy. + */ + private Builder(Builder other) { + super(other); + } + + /** + * Creates a Builder by copying an existing int$ instance + * + * @param other The existing instance to copy. + */ + private Builder(int$ other) { + super(SCHEMA$, MODEL$); + } + + @Override + @SuppressWarnings("unchecked") + public int$ build() { + try { + int$ record = new int$(); + return record; + } catch (org.apache.avro.AvroMissingFieldException e) { + throw e; + } catch (Exception e) { + throw new org.apache.avro.AvroRuntimeException(e); + } + } + } + + @SuppressWarnings("unchecked") + private static final org.apache.avro.io.DatumWriter WRITER$ = (org.apache.avro.io.DatumWriter) MODEL$ + .createDatumWriter(SCHEMA$); + + @Override + public void writeExternal(java.io.ObjectOutput out) throws java.io.IOException { + WRITER$.write(this, SpecificData.getEncoder(out)); + } + + @SuppressWarnings("unchecked") + private static final org.apache.avro.io.DatumReader READER$ = (org.apache.avro.io.DatumReader) MODEL$ + .createDatumReader(SCHEMA$); + + @Override + public void readExternal(java.io.ObjectInput in) throws java.io.IOException { + READER$.read(this, SpecificData.getDecoder(in)); + } + + @Override + protected boolean hasCustomCoders() { + return true; + } + + @Override + public void customEncode(org.apache.avro.io.Encoder out) throws java.io.IOException { + } + + @Override + public void customDecode(org.apache.avro.io.ResolvingDecoder in) throws java.io.IOException { + org.apache.avro.Schema.Field[] fieldOrder = in.readFieldOrderIfDiff(); + if (fieldOrder == null) { + } else { + for (int i = 0; i < 0; i++) { + switch (fieldOrder[i].pos()) { + default: + throw new java.io.IOException("Corrupt ResolvingDecoder."); + } + } + } + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/NonCopyingByteArrayOutputStreamTest.java b/lang/java/avro/src/test/java/org/apache/avro/util/NonCopyingByteArrayOutputStreamTest.java new file mode 100644 index 00000000000..0919edbd41c --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/util/NonCopyingByteArrayOutputStreamTest.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.util; + +import java.io.IOException; +import java.nio.ByteBuffer; + +import org.junit.jupiter.api.Test; + +import org.apache.avro.SystemLimitException; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +public class NonCopyingByteArrayOutputStreamTest { + + /** + * Basic test: write then read. + */ + @Test + public void testDefaultWriteWorks() throws IOException { + NonCopyingByteArrayOutputStream out = new NonCopyingByteArrayOutputStream(1); + out.write('a'); + final byte[] b = "string".getBytes(); + out.write(b, 0, b.length); + out.close(); + final ByteBuffer buffer = out.asByteBuffer(); + assertEquals('a', buffer.get()); + for (byte value : b) { + assertEquals(value, buffer.get()); + } + } + + /** + * Test write limiting. + */ + @Test + public void testLimitedWrite() throws IOException { + NonCopyingByteArrayOutputStream out = NonCopyingByteArrayOutputStream.capacityLimitedOutputStream(1, 4); + out.write('a'); + // it's impossible to go over the limit in a write(bytes) call. + final byte[] b = "longstring".getBytes(); + assertThrows(SystemLimitException.class, () -> out.write(b), + "Buffer size 11 (bytes) exceeds maximum allowed size 4."); + // we can still write up to the limit...the buffer has not been written to yet. + out.write(b, 0, 2); + out.write('z'); + // now at end of file, so another write shall fail. + assertThrows(SystemLimitException.class, () -> out.write('x')); + out.close(); + // validate everything successfully written is there + final ByteBuffer buffer = out.asByteBuffer(); + for (byte value : "aloz".getBytes()) { + assertEquals(value, buffer.get()); + } + } + + @Test + public void testLimitedWriteBytes() { + NonCopyingByteArrayOutputStream out = NonCopyingByteArrayOutputStream.capacityLimitedOutputStream(1, 4); + out.writeBytes("abcd".getBytes()); + assertThrows(SystemLimitException.class, () -> out.writeBytes("e".getBytes())); + } + + @Test + public void testInitialCapacityIsClampedToLimit() throws IOException { + NonCopyingByteArrayOutputStream out = NonCopyingByteArrayOutputStream.capacityLimitedOutputStream(1024, 4); + out.write("abcd".getBytes()); + assertThrows(SystemLimitException.class, () -> out.write('e')); + } + + @Test + public void testInnerLimitCheck() throws Throwable { + assertThrows(SystemLimitException.class, () -> SystemLimitException.checkMaxDecompressCapacity(256L, 0, 100_000)); + SystemLimitException.checkMaxDecompressCapacity(256L, 0, 256); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/TestCaseFinder.java b/lang/java/avro/src/test/java/org/apache/avro/util/TestCaseFinder.java index 3c8ef0ce7d7..5cc695766d7 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/util/TestCaseFinder.java +++ b/lang/java/avro/src/test/java/org/apache/avro/util/TestCaseFinder.java @@ -17,35 +17,24 @@ */ package org.apache.avro.util; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.BufferedReader; import java.io.StringReader; import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; -import org.junit.Test; -import org.junit.experimental.runners.Enclosed; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; - -@RunWith(Enclosed.class) public class TestCaseFinder { - @RunWith(Parameterized.class) + @Nested public static class SimpleCases { - String input, label; - List expectedOutput; - - public SimpleCases(String input, String label, Object[][] ex) { - this.input = input; - this.label = label; - this.expectedOutput = Arrays.asList(ex); - } - @Parameters public static List cases() { List result = new ArrayList<>(); result.add(new Object[] { "", "foo", new Object[][] {} }); @@ -70,37 +59,49 @@ public static List cases() { return result; } - @Test - public void testOutput() throws Exception { + @ParameterizedTest + @MethodSource("cases") + void output(String input, String label, Object[][] ex) throws Exception { List result = new ArrayList<>(); CaseFinder.find(mk(input), label, result); - assertTrue(pr(result), eq(result, expectedOutput)); + List expectedOutput = Arrays.asList(ex); + assertTrue(eq(result, expectedOutput), pr(result)); } } - public static class NonParameterized { - @Test(expected = java.lang.IllegalArgumentException.class) - public void testBadDocLabel1() throws Exception { - List result = new ArrayList<>(); - CaseFinder.find(mk("< { + List result = new ArrayList<>(); + CaseFinder.find(mk("< result = new ArrayList<>(); - CaseFinder.find(mk("< { + List result = new ArrayList<>(); + CaseFinder.find(mk("< result = new ArrayList<>(); - CaseFinder.find(mk("< { + List result = new ArrayList<>(); + CaseFinder.find(mk("< result = new ArrayList<>(); - CaseFinder.find(mk("< { + List result = new ArrayList<>(); + CaseFinder.find(mk("< ClassUtils.forName(boolean[][][][][][].class.getName())); + assertDoesNotThrow(() -> ClassUtils.forName("java.lang.String")); + assertDoesNotThrow(() -> ClassUtils.forName(java.math.BigDecimal[][][][].class.getName())); + + // The package "org.apache.avro" is allowed by default for the test environment + assertDoesNotThrow(() -> ClassUtils.forName("org.apache.avro.util.TestClassSecurityValidator$TestInnerClass")); + + // Test a couple of default untrusted classes via ClassUtils + assertThrows(SecurityException.class, () -> ClassUtils.forName("java.net.InetAddress")); + assertThrows(SecurityException.class, () -> ClassUtils.forName("java.io.FileInputStream")); + } + + @Test + void testBuilder() { + ClassSecurityValidator.setGlobal(ClassSecurityValidator.builder().add(TestClassSecurityValidator.class).build()); + + assertDoesNotThrow(() -> ClassUtils.forName("org.apache.avro.util.TestClassSecurityValidator")); + assertThrows(SecurityException.class, + () -> ClassUtils.forName("org.apache.avro.util.TestClassSecurityValidator$TestInnerClass")); + + // Test that arrays and primitives also work + assertDoesNotThrow(() -> ClassUtils.forName(short[][][][][].class.getName())); + assertDoesNotThrow(() -> ClassUtils.forName(TestClassSecurityValidator[][][][].class.getName())); + assertThrows(SecurityException.class, () -> ClassUtils.forName(TestInnerClass[][].class.getName())); + } + + @Test + void testOwnImplementation() { + ClassSecurityValidator.setGlobal(new ClassSecurityPredicate() { + @Override + public boolean isTrusted(Class clazz) { + return clazz.getSimpleName().contains("Inner"); + } + + @Override + public void forbiddenClass(String className) { + throw new SecurityException("Not inner"); + } + }); + assertDoesNotThrow(() -> ClassUtils.forName(TestInnerClass.class.getName())); + Exception e = assertThrows(SecurityException.class, + () -> ClassUtils.forName(TestClassSecurityValidator.class.getName())); + assertEquals("Not inner", e.getMessage()); + + // Test that arrays and primitives also work + assertDoesNotThrow(() -> ClassUtils.forName(char[][][][].class.getName())); + assertDoesNotThrow(() -> ClassUtils.forName(TestInnerClass[][][][].class.getName())); + e = assertThrows(SecurityException.class, () -> ClassUtils.forName(TestClassSecurityValidator[][].class.getName())); + assertEquals("Not inner", e.getMessage()); + } + + @Test + void testClassUtilsEnforcesValidator() { + ClassSecurityValidator.setGlobal(ClassSecurityValidator.builder().add("java.lang.String").build()); + + assertThrows(SecurityException.class, () -> ClassUtils.forName("java.net.URI"), + "ClassUtils.forName should reject classes not in the trusted set"); + + assertDoesNotThrow(() -> ClassUtils.forName("java.lang.String"), + "ClassUtils.forName should allow classes in the trusted set"); + } + + @Test + void testDirectLoadClassDoesNotUseValidator() throws ClassNotFoundException { + ClassSecurityValidator.setGlobal(ClassSecurityValidator.builder().add("java.lang.String").build()); + + ClassLoader cl = Thread.currentThread().getContextClassLoader(); + Class loaded = cl.loadClass("java.net.URI"); + assertNotNull(loaded, "Direct ClassLoader.loadClass() loads any class regardless of the validator"); + + assertThrows(SecurityException.class, () -> ClassUtils.forName("java.net.URI"), + "ClassUtils.forName correctly applies the validator"); + } + + @Test + void testBuildComplexPredicate() { + ClassSecurityValidator.setGlobal(ClassSecurityValidator.composite( + ClassSecurityValidator.builder().add(TestInnerClass.class).add(TestClassSecurityValidator.class).build(), + ClassSecurityValidator.DEFAULT, c -> c.getPackageName().equals("java.lang"))); + + // Test that the defaults work since we included them + testDefault(); + + assertDoesNotThrow(() -> ClassUtils.forName(TestInnerClass.class.getName())); + assertDoesNotThrow(() -> ClassUtils.forName(TestClassSecurityValidator.class.getName())); + assertDoesNotThrow(() -> ClassUtils.forName(StringBuilder.class.getName())); + assertDoesNotThrow(() -> ClassUtils.forName("java.lang.StringBuffer")); + assertDoesNotThrow(() -> ClassUtils.forName(BigInteger.class.getName())); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/TestRandomData.java b/lang/java/avro/src/test/java/org/apache/avro/util/TestRandomData.java new file mode 100644 index 00000000000..a7f7862ea45 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/util/TestRandomData.java @@ -0,0 +1,225 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.util; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.util.Objects; +import java.util.Random; + +import org.apache.avro.JsonSchemaParser; +import org.apache.avro.Schema; +import org.apache.avro.file.DataFileReader; +import org.apache.avro.file.DataFileWriter; +import org.apache.avro.generic.GenericData; +import org.apache.avro.reflect.ReflectData; +import org.apache.avro.specific.SpecificData; +import org.apache.avro.specific.SpecificRecordBase; +import org.junit.Before; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +public class TestRandomData { + private long seed; + + private int count; + + private File file; + private GenericData genericData; + private SpecificData specificData; + private Schema specificSchema; + private ReflectData reflectData; + private Schema reflectedSchema; + + @Before + public void setUp() throws Exception { + file = Files.createTempFile("randomData", ".avro").toFile(); + seed = System.currentTimeMillis(); + count = new Random().nextInt(50) + 75; + + genericData = GenericData.get(); + specificData = SpecificData.get(); + specificSchema = specificData.getSchema(SpecificTestRecord.class); + reflectData = ReflectData.get(); + reflectedSchema = reflectData.getSchema(ReflectTestRecord.class); + } + + @Test + public void testRandomDataFromGenericToGeneric() throws IOException { + checkWrite(genericData, TEST_SCHEMA); + checkRead(genericData, TEST_SCHEMA); + } + + @Test + public void testRandomDataFromGenericToSpecific() throws IOException { + checkWrite(genericData, TEST_SCHEMA); + checkRead(specificData, specificSchema); + } + + @Test + public void testRandomDataFromGenericToReflected() throws IOException { + checkWrite(genericData, TEST_SCHEMA); + checkRead(reflectData, reflectedSchema); + } + + @Test + public void testRandomDataFromSpecificToGeneric() throws IOException { + checkWrite(specificData, specificSchema); + checkRead(genericData, TEST_SCHEMA); + } + + @Test + public void testRandomDataFromSpecificToSpecific() throws IOException { + checkWrite(specificData, specificSchema); + checkRead(specificData, specificSchema); + } + + @Test + public void testRandomDataFromSpecificToReflected() throws IOException { + checkWrite(specificData, specificSchema); + checkRead(reflectData, reflectedSchema); + } + + @Test + public void testRandomDataFromReflectedToGeneric() throws IOException { + checkWrite(reflectData, reflectedSchema); + checkRead(genericData, TEST_SCHEMA); + } + + @Test + public void testRandomDataFromReflectedToSpecific() throws IOException { + checkWrite(reflectData, reflectedSchema); + checkRead(specificData, specificSchema); + } + + @Test + public void testRandomDataFromReflectedToReflected() throws IOException { + checkWrite(reflectData, reflectedSchema); + checkRead(reflectData, reflectedSchema); + } + + private void checkWrite(GenericData genericData, Schema schema) throws IOException { + // noinspection unchecked + try (DataFileWriter writer = new DataFileWriter(genericData.createDatumWriter(schema))) { + writer.create(schema, file); + for (Object datum : new RandomData(genericData, schema, this.count, seed)) { + writer.append(datum); + } + } + } + + private void checkRead(GenericData genericData, Schema schema) throws IOException { + // noinspection unchecked + try (DataFileReader reader = new DataFileReader(file, genericData.createDatumReader(schema))) { + for (Object expected : new RandomData(genericData, schema, this.count, seed)) { + assertEquals(expected, reader.next()); + } + } + } + + /* + * Test classes: they implement the same schema, but one is a SpecificRecord and + * the other uses a reflected schema. + */ + + public static final String TEST_SCHEMA_JSON = "{\"type\":\"record\",\"name\":\"Record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":{\"type\":\"string\",\"avro.java.string\":\"String\"}}]}"; + + public static final Schema TEST_SCHEMA = JsonSchemaParser.parseInternal(TEST_SCHEMA_JSON); + + public static class SpecificTestRecord extends SpecificRecordBase { + public static final Schema SCHEMA$ = JsonSchemaParser.parseInternal(TEST_SCHEMA_JSON.replace("\"name\":\"Record\"", + "\"name\":\"" + SpecificTestRecord.class.getCanonicalName() + "\"")); + private int x; + private String y; + + @Override + public Schema getSchema() { + return SCHEMA$; + } + + @Override + public void put(int i, Object v) { + switch (i) { + case 0: + x = (Integer) v; + break; + case 1: + y = (String) v; + break; + default: + throw new RuntimeException(); + } + } + + @Override + public Object get(int i) { + switch (i) { + case 0: + return x; + case 1: + return y; + } + throw new RuntimeException(); + } + } + + public static class ReflectTestRecord { + private int x; + private String y; + + public int getX() { + return x; + } + + public void setX(int x) { + this.x = x; + } + + public String getY() { + return y; + } + + public void setY(String y) { + this.y = y; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + ReflectTestRecord that = (ReflectTestRecord) o; + return x == that.x && Objects.equals(y, that.y); + } + + @Override + public int hashCode() { + return Objects.hash(x, y); + } + + @Override + public String toString() { + return String.format("{\"x\": %d, \"y\": \"%s\"}", x, y); + } + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/TestSchemaResolver.java b/lang/java/avro/src/test/java/org/apache/avro/util/TestSchemaResolver.java new file mode 100644 index 00000000000..1a340b7fa5d --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/util/TestSchemaResolver.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.util; + +import org.apache.avro.Schema; +import org.apache.avro.SchemaBuilder; +import org.junit.Test; + +import java.io.IOException; + +public class TestSchemaResolver { + + @Test + public void testResolving() throws IOException { + // Path testIdl = Paths.get(".", "src", "test", "idl", + // "cycle.avdl").toAbsolutePath(); + // IdlReader parser = new IdlReader(); + // IdlFile idlFile = parser.parse(testIdl); + // Protocol protocol = idlFile.getProtocol(); + // System.out.println(protocol); + // Assert.assertEquals(5, protocol.getTypes().size()); + } + + @Test(expected = IllegalArgumentException.class) + public void testIsUnresolvedSchemaError1() { + // No "org.apache.avro.idl.unresolved.name" property + Schema s = SchemaBuilder.record("R").fields().endRecord(); + SchemaResolver.getUnresolvedSchemaName(s); + } + + @Test(expected = IllegalArgumentException.class) + public void testIsUnresolvedSchemaError2() { + // No "UnresolvedSchema" property + Schema s = SchemaBuilder.record("R").prop("org.apache.avro.idl.unresolved.name", "x").fields().endRecord(); + SchemaResolver.getUnresolvedSchemaName(s); + } + + @Test(expected = IllegalArgumentException.class) + public void testIsUnresolvedSchemaError3() { + // Namespace not "org.apache.avro.compiler". + Schema s = SchemaBuilder.record("UnresolvedSchema").prop("org.apache.avro.idl.unresolved.name", "x").fields() + .endRecord(); + SchemaResolver.getUnresolvedSchemaName(s); + } + + @Test(expected = IllegalArgumentException.class) + public void testGetUnresolvedSchemaNameError() { + Schema s = SchemaBuilder.fixed("a").size(10); + SchemaResolver.getUnresolvedSchemaName(s); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/TestSchemas.java b/lang/java/avro/src/test/java/org/apache/avro/util/TestSchemas.java new file mode 100644 index 00000000000..dc2f34bffff --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/util/TestSchemas.java @@ -0,0 +1,210 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.util; + +import org.apache.avro.Schema; +import org.apache.avro.SchemaParser; +import org.junit.Assert; +import org.junit.Test; + +public class TestSchemas { + + private static class TestVisitor implements SchemaVisitor { + StringBuilder sb = new StringBuilder(); + + @Override + public SchemaVisitorAction visitTerminal(Schema terminal) { + sb.append(terminal); + return SchemaVisitorAction.CONTINUE; + } + + @Override + public SchemaVisitorAction visitNonTerminal(Schema nonTerminal) { + String n = nonTerminal.getName(); + sb.append(n).append('.'); + if (n.startsWith("t")) { + return SchemaVisitorAction.TERMINATE; + } else if (n.startsWith("ss")) { + return SchemaVisitorAction.SKIP_SIBLINGS; + } else if (n.startsWith("st")) { + return SchemaVisitorAction.SKIP_SUBTREE; + } else { + return SchemaVisitorAction.CONTINUE; + } + } + + @Override + public SchemaVisitorAction afterVisitNonTerminal(Schema nonTerminal) { + sb.append("!"); + String n = nonTerminal.getName(); + if (n.startsWith("ct")) { + return SchemaVisitorAction.TERMINATE; + } else if (n.startsWith("css")) { + return SchemaVisitorAction.SKIP_SIBLINGS; + } else if (n.startsWith("cst")) { + return SchemaVisitorAction.SKIP_SUBTREE; + } else { + return SchemaVisitorAction.CONTINUE; + } + } + + @Override + public String get() { + return sb.toString(); + } + } + + @Test + public void testVisit1() { + String s1 = "{\"type\": \"record\", \"name\": \"t1\", \"fields\": [{\"name\": \"f1\", \"type\": \"int\"}]}"; + Assert.assertEquals("t1.", Schemas.visit(SchemaParser.parseSingle(s1), new TestVisitor())); + } + + @Test + public void testVisit2() { + String s2 = "{\"type\": \"record\", \"name\": \"c1\", \"fields\": [{\"name\": \"f1\", \"type\": \"int\"}]}"; + Assert.assertEquals("c1.\"int\"!", Schemas.visit(SchemaParser.parseSingle(s2), new TestVisitor())); + + } + + @Test + public void testVisit3() { + String s3 = "{\"type\": \"record\", \"name\": \"ss1\", \"fields\": [{\"name\": \"f1\", \"type\": \"int\"}]}"; + Assert.assertEquals("ss1.", Schemas.visit(SchemaParser.parseSingle(s3), new TestVisitor())); + + } + + @Test + public void testVisit4() { + String s4 = "{\"type\": \"record\", \"name\": \"st1\", \"fields\": [{\"name\": \"f1\", \"type\": \"int\"}]}"; + Assert.assertEquals("st1.!", Schemas.visit(SchemaParser.parseSingle(s4), new TestVisitor())); + + } + + @Test + public void testVisit5() { + String s5 = "{\"type\": \"record\", \"name\": \"c1\", \"fields\": [" + + "{\"name\": \"f1\", \"type\": {\"type\": \"record\", \"name\": \"c2\", \"fields\": " + + "[{\"name\": \"f11\", \"type\": \"int\"}]}}," + "{\"name\": \"f2\", \"type\": \"long\"}" + "]}"; + Assert.assertEquals("c1.c2.\"int\"!\"long\"!", Schemas.visit(SchemaParser.parseSingle(s5), new TestVisitor())); + + } + + @Test + public void testVisit6() { + String s6 = "{\"type\": \"record\", \"name\": \"c1\", \"fields\": [" + + "{\"name\": \"f1\", \"type\": {\"type\": \"record\", \"name\": \"ss2\", \"fields\": " + + "[{\"name\": \"f11\", \"type\": \"int\"}]}}," + "{\"name\": \"f2\", \"type\": \"long\"}" + "]}"; + Assert.assertEquals("c1.ss2.!", Schemas.visit(SchemaParser.parseSingle(s6), new TestVisitor())); + + } + + @Test + public void testVisit7() { + String s7 = "{\"type\": \"record\", \"name\": \"c1\", \"fields\": [" + + "{\"name\": \"f1\", \"type\": {\"type\": \"record\", \"name\": \"css2\", \"fields\": " + + "[{\"name\": \"f11\", \"type\": \"int\"}]}}," + "{\"name\": \"f2\", \"type\": \"long\"}" + "]}"; + Assert.assertEquals("c1.css2.\"int\"!!", Schemas.visit(SchemaParser.parseSingle(s7), new TestVisitor())); + } + + @Test(expected = UnsupportedOperationException.class) + public void testVisit8() { + String s8 = "{\"type\": \"record\", \"name\": \"c1\", \"fields\": [" + + "{\"name\": \"f1\", \"type\": {\"type\": \"record\", \"name\": \"cst2\", \"fields\": " + + "[{\"name\": \"f11\", \"type\": \"int\"}]}}," + "{\"name\": \"f2\", \"type\": \"int\"}" + "]}"; + Schemas.visit(SchemaParser.parseSingle(s8), new TestVisitor()); + } + + @Test + public void testVisit9() { + String s9 = "{\"type\": \"record\", \"name\": \"c1\", \"fields\": [" + + "{\"name\": \"f1\", \"type\": {\"type\": \"record\", \"name\": \"ct2\", \"fields\": " + + "[{\"name\": \"f11\", \"type\": \"int\"}]}}," + "{\"name\": \"f2\", \"type\": \"long\"}" + "]}"; + Assert.assertEquals("c1.ct2.\"int\"!", Schemas.visit(SchemaParser.parseSingle(s9), new TestVisitor())); + } + + @Test(expected = UnsupportedOperationException.class) + public void testVisit10() { + String s10 = "{\"type\": \"record\", \"name\": \"c1\", \"fields\": [" + + "{\"name\": \"f1\", \"type\": {\"type\": \"record\", \"name\": \"ct2\", \"fields\": " + + "[{\"name\": \"f11\", \"type\": \"int\"}]}}," + "{\"name\": \"f2\", \"type\": \"int\"}" + "]}"; + Schemas.visit(SchemaParser.parseSingle(s10), new TestVisitor() { + @Override + public SchemaVisitorAction visitTerminal(Schema terminal) { + return SchemaVisitorAction.SKIP_SUBTREE; + } + }); + } + + @Test + public void testVisit11() { + String s11 = "{\"type\": \"record\", \"name\": \"c1\", \"fields\": [" + + "{\"name\": \"f1\", \"type\": {\"type\": \"record\", \"name\": \"c2\", \"fields\": " + + "[{\"name\": \"f11\", \"type\": \"int\"},{\"name\": \"f12\", \"type\": \"double\"}" + "]}}," + + "{\"name\": \"f2\", \"type\": \"long\"}" + "]}"; + Assert.assertEquals("c1.c2.\"int\".!\"long\".!", Schemas.visit(SchemaParser.parseSingle(s11), new TestVisitor() { + @Override + public SchemaVisitorAction visitTerminal(Schema terminal) { + sb.append(terminal).append('.'); + return SchemaVisitorAction.SKIP_SIBLINGS; + } + })); + } + + @Test + public void testVisit12() { + String s12 = "{\"type\": \"record\", \"name\": \"c1\", \"fields\": [" + + "{\"name\": \"f1\", \"type\": {\"type\": \"record\", \"name\": \"ct2\", \"fields\": " + + "[{\"name\": \"f11\", \"type\": \"int\"}]}}," + "{\"name\": \"f2\", \"type\": \"long\"}" + "]}"; + Assert.assertEquals("c1.ct2.\"int\".", Schemas.visit(SchemaParser.parseSingle(s12), new TestVisitor() { + @Override + public SchemaVisitorAction visitTerminal(Schema terminal) { + sb.append(terminal).append('.'); + return SchemaVisitorAction.TERMINATE; + } + })); + } + + @Test + public void testVisit13() { + String s12 = "{\"type\": \"int\"}"; + Assert.assertEquals("\"int\".", Schemas.visit(SchemaParser.parseSingle(s12), new TestVisitor() { + @Override + public SchemaVisitorAction visitTerminal(Schema terminal) { + sb.append(terminal).append('.'); + return SchemaVisitorAction.SKIP_SIBLINGS; + } + })); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java b/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java index 918465a725c..3e36d9a0214 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java +++ b/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java @@ -19,9 +19,7 @@ import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.is; -import static org.junit.Assert.assertNotEquals; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.*; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -30,11 +28,13 @@ import java.io.ObjectOutputStream; import java.nio.charset.StandardCharsets; -import org.junit.Test; +import org.apache.avro.SystemLimitException; +import org.apache.avro.TestSystemLimitException; +import org.junit.jupiter.api.Test; public class TestUtf8 { @Test - public void testByteConstructor() throws Exception { + void byteConstructor() throws Exception { byte[] bs = "Foo".getBytes(StandardCharsets.UTF_8); Utf8 u = new Utf8(bs); assertEquals(bs.length, u.getByteLength()); @@ -44,7 +44,7 @@ public void testByteConstructor() throws Exception { } @Test - public void testArrayReusedWhenLargerThanRequestedSize() { + void arrayReusedWhenLargerThanRequestedSize() { byte[] bs = "55555".getBytes(StandardCharsets.UTF_8); Utf8 u = new Utf8(bs); assertEquals(5, u.getByteLength()); @@ -58,48 +58,89 @@ public void testArrayReusedWhenLargerThanRequestedSize() { } @Test - public void testHashCodeReused() { - assertEquals(97, new Utf8("a").hashCode()); - assertEquals(3904, new Utf8("zz").hashCode()); - assertEquals(122, new Utf8("z").hashCode()); - assertEquals(99162322, new Utf8("hello").hashCode()); - assertEquals(3198781, new Utf8("hell").hashCode()); + void hashCodeReused() { + assertEquals(1, new Utf8().hashCode()); + assertEquals(128, new Utf8("a").hashCode()); + assertEquals(4865, new Utf8("zz").hashCode()); + assertEquals(153, new Utf8("z").hashCode()); + assertEquals(127791473, new Utf8("hello").hashCode()); + assertEquals(4122302, new Utf8("hell").hashCode()); Utf8 u = new Utf8("a"); - assertEquals(97, u.hashCode()); - assertEquals(97, u.hashCode()); + assertEquals(128, u.hashCode()); + assertEquals(128, u.hashCode()); u.set("a"); - assertEquals(97, u.hashCode()); + assertEquals(128, u.hashCode()); u.setByteLength(1); - assertEquals(97, u.hashCode()); + assertEquals(128, u.hashCode()); u.setByteLength(2); - assertNotEquals(97, u.hashCode()); + assertNotEquals(128, u.hashCode()); u.set("zz"); - assertEquals(3904, u.hashCode()); + assertEquals(4865, u.hashCode()); u.setByteLength(1); - assertEquals(122, u.hashCode()); + assertEquals(153, u.hashCode()); u.set("hello"); - assertEquals(99162322, u.hashCode()); + assertEquals(127791473, u.hashCode()); u.setByteLength(4); - assertEquals(3198781, u.hashCode()); + assertEquals(4122302, u.hashCode()); u.set(new Utf8("zz")); - assertEquals(3904, u.hashCode()); + assertEquals(4865, u.hashCode()); u.setByteLength(1); - assertEquals(122, u.hashCode()); + assertEquals(153, u.hashCode()); u.set(new Utf8("hello")); - assertEquals(99162322, u.hashCode()); + assertEquals(127791473, u.hashCode()); u.setByteLength(4); - assertEquals(3198781, u.hashCode()); + assertEquals(4122302, u.hashCode()); } + /** + * There are two different code paths that hashcode() can call depending on the + * state of the internal buffer. If the buffer is full (string length is equal + * to buffer length) then the JDK hashcode function can be used. However, if the + * buffer is not full (string length is less than the internal buffer length), + * then the JDK does not support this prior to JDK 23 and a scalar + * implementation is the only option today. This difference can be resolved with + * JDK 23 as it supports both cases. + */ @Test - public void testSerialization() throws IOException, ClassNotFoundException { + void hashCodeBasedOnCapacity() { + // string = 8; buffer = 8 + Utf8 fullCapacity = new Utf8("abcdefgh", 8); + + // string = 8; buffer = 9 + Utf8 partialCapacity = new Utf8("abcdefghX", 8); + + assertEquals(fullCapacity.hashCode(), partialCapacity.hashCode()); + } + + @Test + void oversizeUtf8() { + Utf8 u = new Utf8(); + u.setByteLength(1024); + assertEquals(1024, u.getByteLength()); + assertThrows(UnsupportedOperationException.class, + () -> u.setByteLength(TestSystemLimitException.MAX_ARRAY_VM_LIMIT + 1)); + + try { + System.setProperty(SystemLimitException.MAX_STRING_LENGTH_PROPERTY, Long.toString(1000L)); + TestSystemLimitException.resetLimits(); + + Exception ex = assertThrows(SystemLimitException.class, () -> u.setByteLength(1024)); + assertEquals("String length 1024 exceeds maximum allowed", ex.getMessage()); + } finally { + System.clearProperty(SystemLimitException.MAX_STRING_LENGTH_PROPERTY); + TestSystemLimitException.resetLimits(); + } + } + + @Test + void serialization() throws IOException, ClassNotFoundException { try (ByteArrayOutputStream bos = new ByteArrayOutputStream(); ObjectOutputStream oos = new ObjectOutputStream(bos)) { diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/TimePeriodTest.java b/lang/java/avro/src/test/java/org/apache/avro/util/TimePeriodTest.java new file mode 100644 index 00000000000..cd9809be414 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/util/TimePeriodTest.java @@ -0,0 +1,306 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.util; + +import org.junit.jupiter.api.Test; + +import java.time.DateTimeException; +import java.time.Duration; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.time.Period; +import java.time.chrono.IsoChronology; +import java.time.chrono.JapaneseChronology; +import java.time.temporal.ChronoUnit; +import java.time.temporal.Temporal; +import java.time.temporal.TemporalAmount; +import java.time.temporal.TemporalUnit; +import java.time.temporal.UnsupportedTemporalTypeException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import static java.time.temporal.ChronoUnit.DAYS; +import static java.time.temporal.ChronoUnit.ERAS; +import static java.time.temporal.ChronoUnit.MICROS; +import static java.time.temporal.ChronoUnit.MILLIS; +import static java.time.temporal.ChronoUnit.MONTHS; +import static java.time.temporal.ChronoUnit.NANOS; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class TimePeriodTest { + // This Long is too large to fit into an unsigned int. + private static final long TOO_LARGE = Integer.MAX_VALUE * 3L; + + @Test + void validateConstruction() { + TimePeriod timePeriod = TimePeriod.of(12, 34, 56); + assertSame(timePeriod, TimePeriod.from(timePeriod)); + assertComponents(12, 34, 56, timePeriod); + + assertComponents(14, 3, 0, TimePeriod.from(IsoChronology.INSTANCE.period(1, 2, 3))); + + assertComponents(36_000, 0, 0, TimePeriod.from(TimeAmount.of(ChronoUnit.MILLENNIA, 3))); + assertComponents(3_600, 0, 0, TimePeriod.from(TimeAmount.of(ChronoUnit.CENTURIES, 3))); + assertComponents(360, 0, 0, TimePeriod.from(TimeAmount.of(ChronoUnit.DECADES, 3))); + assertComponents(36, 0, 0, TimePeriod.from(TimeAmount.of(ChronoUnit.YEARS, 3))); + assertComponents(3, 0, 0, TimePeriod.from(TimeAmount.of(MONTHS, 3))); + + assertComponents(0, 21, 0, TimePeriod.from(TimeAmount.of(ChronoUnit.WEEKS, 3))); + assertComponents(0, 3, 0, TimePeriod.from(TimeAmount.of(DAYS, 3))); + assertComponents(0, 2, 0, TimePeriod.from(TimeAmount.of(ChronoUnit.HALF_DAYS, 4))); + assertComponents(0, 2, 43_200_000, TimePeriod.from(TimeAmount.of(ChronoUnit.HALF_DAYS, 5))); + + assertComponents(0, 0, 10_800_000, TimePeriod.from(TimeAmount.of(ChronoUnit.HOURS, 3))); + assertComponents(0, 0, 180_000, TimePeriod.from(TimeAmount.of(ChronoUnit.MINUTES, 3))); + assertComponents(0, 0, 3_000, TimePeriod.from(TimeAmount.of(ChronoUnit.SECONDS, 3))); + assertComponents(0, 0, 3, TimePeriod.from(TimeAmount.of(MILLIS, 3))); + assertComponents(0, 0, 3, TimePeriod.from(TimeAmount.of(MICROS, 3_000))); + assertComponents(0, 0, 3, TimePeriod.from(TimeAmount.of(NANOS, 3_000_000))); + + // Micros and nanos must be a multiple of milliseconds + assertThrows(DateTimeException.class, () -> TimePeriod.from(TimeAmount.of(ChronoUnit.MICROS, 3))); + assertThrows(DateTimeException.class, () -> TimePeriod.from(TimeAmount.of(ChronoUnit.NANOS, 3))); + // Unsupported cases (null, non-ISO chronology, unknown temporal unit, + // non-ChronoUnit) + assertThrows(NullPointerException.class, () -> TimePeriod.from(null)); + assertThrows(DateTimeException.class, () -> TimePeriod.from(JapaneseChronology.INSTANCE.period(1, 2, 3))); + assertThrows(UnsupportedTemporalTypeException.class, () -> TimePeriod.from(TimeAmount.of(ChronoUnit.ERAS, 1))); + assertThrows(UnsupportedTemporalTypeException.class, () -> TimePeriod.from(TimeAmount.of(DummyUnit.INSTANCE, 3))); + // Arguments are long, but must fit an unsigned long + assertThrows(ArithmeticException.class, () -> TimePeriod.of(TOO_LARGE, 0, 0)); + assertThrows(ArithmeticException.class, () -> TimePeriod.of(0, TOO_LARGE, 0)); + assertThrows(ArithmeticException.class, () -> TimePeriod.of(0, 0, TOO_LARGE)); + + // Odd one out: querying an unsupported temporal unit + // (assertComponents handles all valid cases) + assertThrows(UnsupportedTemporalTypeException.class, () -> TimePeriod.of(1, 1, 1).get(ERAS)); + } + + @Test + void checkConversionsFromJavaTime() { + assertEquals(TimePeriod.of(12, 0, 0), TimePeriod.from(Period.ofYears(1))); + assertEquals(TimePeriod.of(2, 0, 0), TimePeriod.from(Period.ofMonths(2))); + assertEquals(TimePeriod.of(0, 21, 0), TimePeriod.from(Period.ofWeeks(3))); + assertEquals(TimePeriod.of(0, 4, 0), TimePeriod.from(Period.ofDays(4))); + + assertEquals(TimePeriod.of(0, 0, 1), TimePeriod.from(Duration.ofNanos(1_000_000))); + assertEquals(TimePeriod.of(0, 0, 2), TimePeriod.from(Duration.ofMillis(2))); + assertEquals(TimePeriod.of(0, 0, 3_000), TimePeriod.from(Duration.ofSeconds(3))); + assertEquals(TimePeriod.of(0, 0, 240000), TimePeriod.from(Duration.ofMinutes(4))); + assertEquals(TimePeriod.of(0, 0, 18000000), TimePeriod.from(Duration.ofHours(5))); + // Duration never takes into account things like daylight saving + assertEquals(TimePeriod.of(0, 0, 518400000), TimePeriod.from(Duration.ofDays(6))); + } + + @Test + void checkConversionsToJavaTime() { + TimePeriod months = TimePeriod.of(1, 0, 0); + TimePeriod days = TimePeriod.of(0, 2, 0); + TimePeriod time = TimePeriod.of(0, 0, 3); + TimePeriod all = TimePeriod.of(1, 2, 3); + + assertTrue(months.isDateBased()); + assertTrue(days.isDateBased()); + assertFalse(all.isDateBased()); + assertFalse(time.isDateBased()); + + assertEquals(Period.of(0, 1, 0), months.toPeriod()); + assertEquals(Period.of(0, 0, 2), days.toPeriod()); + assertThrows(DateTimeException.class, all::toPeriod); + assertThrows(DateTimeException.class, time::toPeriod); + + assertThrows(DateTimeException.class, () -> TimePeriod.of(0, Integer.MAX_VALUE * 2L, 0).toPeriod()); + + assertFalse(months.isTimeBased()); + assertFalse(days.isTimeBased()); + assertFalse(all.isTimeBased()); + assertTrue(time.isTimeBased()); + + assertThrows(DateTimeException.class, months::toDuration); + // Note: though Duration supports this, it uses a fixed 86400 seconds + assertEquals(Duration.ofSeconds(172800), days.toDuration()); + assertThrows(DateTimeException.class, all::toDuration); + assertEquals(Duration.ofMillis(3), time.toDuration()); + } + + @Test + void checkAddingToTemporalItems() { + TimePeriod monthAndTwoDays = TimePeriod.of(1, 2, 0); + TimePeriod threeMillis = TimePeriod.of(0, 0, 3); + TimePeriod complexTimePeriod = TimePeriod.of(1, 2, 3); + + LocalDateTime localDateTime = LocalDateTime.of(2001, 2, 3, 4, 5, 6, 7_000_000); + LocalDate localDate = LocalDate.of(2001, 2, 3); + LocalTime localTime = LocalTime.of(4, 5, 6, 7_000_000); + + assertEquals(localDateTime.plusMonths(1).plusDays(2), localDateTime.plus(monthAndTwoDays)); + assertEquals(localDateTime.plus(3, MILLIS), localDateTime.plus(threeMillis)); + assertEquals(localDateTime.plusMonths(1).plusDays(2).plus(3, MILLIS), localDateTime.plus(complexTimePeriod)); + + assertEquals(localDate.plusMonths(1).plusDays(2), localDate.plus(monthAndTwoDays)); + + assertEquals(localTime.plus(3, MILLIS), localTime.plus(threeMillis)); + + assertEquals(localDateTime.minusMonths(1).minusDays(2), localDateTime.minus(monthAndTwoDays)); + assertEquals(localDateTime.minus(3, MILLIS), localDateTime.minus(threeMillis)); + assertEquals(localDateTime.minusMonths(1).minusDays(2).minus(3, MILLIS), localDateTime.minus(complexTimePeriod)); + + assertEquals(localDate.minusMonths(1).minusDays(2), localDate.minus(monthAndTwoDays)); + + assertEquals(localTime.minus(3, MILLIS), localTime.minus(threeMillis)); + } + + @Test + void checkEqualityTests() { + TimePeriod timePeriod1a = TimePeriod.of(1, 2, 3); + TimePeriod timePeriod1b = TimePeriod.of(1, 2, 3); + TimePeriod timePeriod2 = TimePeriod.of(9, 9, 9); + TimePeriod timePeriod3 = TimePeriod.of(1, 9, 9); + TimePeriod timePeriod4 = TimePeriod.of(1, 2, 9); + + // noinspection EqualsWithItself + assertEquals(timePeriod1a, timePeriod1a); + assertEquals(timePeriod1a, timePeriod1b); + assertEquals(timePeriod1a.hashCode(), timePeriod1b.hashCode()); + + assertNotEquals(timePeriod1a, null); + // noinspection AssertBetweenInconvertibleTypes + assertNotEquals(timePeriod1a, "not equal"); + assertNotEquals(timePeriod1a, timePeriod2); + assertNotEquals(timePeriod1a.hashCode(), timePeriod2.hashCode()); + assertNotEquals(timePeriod1a, timePeriod3); + assertNotEquals(timePeriod1a.hashCode(), timePeriod3.hashCode()); + assertNotEquals(timePeriod1a, timePeriod4); + assertNotEquals(timePeriod1a.hashCode(), timePeriod4.hashCode()); + } + + @Test + void checkStringRepresentation() { + assertEquals("P0", TimePeriod.of(0, 0, 0).toString()); + assertEquals("P1Y", TimePeriod.of(12, 0, 0).toString()); + assertEquals("P2M", TimePeriod.of(2, 0, 0).toString()); + assertEquals("P3", TimePeriod.of(0, 3, 0).toString()); + assertEquals("P1Y2M3", TimePeriod.of(14, 3, 0).toString()); + assertEquals("PT04", TimePeriod.of(0, 0, 14400000).toString()); + assertEquals("PT00:05", TimePeriod.of(0, 0, 300000).toString()); + assertEquals("PT00:00:06", TimePeriod.of(0, 0, 6000).toString()); + assertEquals("PT00:00:00.007", TimePeriod.of(0, 0, 7).toString()); + assertEquals("P1Y2M3T04:05:06.007", TimePeriod.of(14, 3, 14706007).toString()); + + // Days and millis will never overflow to months/days, to respect differences + // in months and days (daylight saving). + assertEquals("P123T1193:02:47.295", TimePeriod.of(0, 123, 4294967295L).toString()); + } + + private void assertComponents(long months, long days, long millis, TimePeriod timePeriod) { + List expectedUnits = new ArrayList<>(Arrays.asList(MONTHS, DAYS, MILLIS)); + if (months == 0) { + expectedUnits.remove(MONTHS); + } + if (days == 0) { + expectedUnits.remove(DAYS); + } + if (millis == 0) { + expectedUnits.remove(MILLIS); + } + assertEquals(expectedUnits, timePeriod.getUnits()); + + assertEquals(months, timePeriod.getMonths()); + assertEquals(months, timePeriod.get(MONTHS)); + assertEquals(days, timePeriod.getDays()); + assertEquals(days, timePeriod.get(DAYS)); + assertEquals(millis, timePeriod.getMillis()); + assertEquals(millis, timePeriod.get(MILLIS)); + } + + private static class TimeAmount implements TemporalAmount { + private final Map amountsPerUnit = new LinkedHashMap<>(); + + static TimeAmount of(TemporalUnit unit, long amount) { + return new TimeAmount().with(unit, amount); + } + + TimeAmount with(TemporalUnit unit, long amount) { + amountsPerUnit.put(unit, amount); + return this; + } + + @Override + public long get(TemporalUnit unit) { + return amountsPerUnit.get(unit); + } + + @Override + public List getUnits() { + return new ArrayList<>(amountsPerUnit.keySet()); + } + + @Override + public Temporal addTo(Temporal temporal) { + throw new UnsupportedOperationException(); + } + + @Override + public Temporal subtractFrom(Temporal temporal) { + throw new UnsupportedOperationException(); + } + } + + private static class DummyUnit implements TemporalUnit { + private static final DummyUnit INSTANCE = new DummyUnit(); + + @Override + public Duration getDuration() { + return null; + } + + @Override + public boolean isDurationEstimated() { + return false; + } + + @Override + public boolean isDateBased() { + return false; + } + + @Override + public boolean isTimeBased() { + return false; + } + + @Override + public R addTo(R temporal, long amount) { + return null; + } + + @Override + public long between(Temporal temporal1Inclusive, Temporal temporal2Exclusive) { + return 0; + } + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/UtfTextUtilsTest.java b/lang/java/avro/src/test/java/org/apache/avro/util/UtfTextUtilsTest.java new file mode 100644 index 00000000000..6c525e6d39a --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/util/UtfTextUtilsTest.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.util; + +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayInputStream; +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +@SuppressWarnings("SpellCheckingInspection") +class UtfTextUtilsTest { + @Test + void validateCharsetDetectionWithBOM() { + assertEquals("UTF-32", testDetection("0000FEFF").name()); + assertEquals("UTF-32", testDetection("FFFE0000").name()); + assertEquals("UTF-16", testDetection("FEFF0041").name()); + assertEquals("UTF-16", testDetection("FFFE4100").name()); + assertEquals("UTF-8", testDetection("EFBBBF41").name()); + + // Invalid UCS-4 encodings: these we're certain we cannot handle. + assertThrows(IllegalArgumentException.class, () -> testDetection("0000FFFE")); + assertThrows(IllegalArgumentException.class, () -> testDetection("FEFF0000")); + } + + @Test + void validateCharsetDetectionWithoutBOM() { + assertEquals("UTF-32BE", testDetection("00000041").name()); + assertEquals("UTF-32LE", testDetection("41000000").name()); + assertEquals("UTF-16BE", testDetection("00410042").name()); + assertEquals("UTF-16LE", testDetection("41004200").name()); + assertEquals("UTF-8", testDetection("41424344").name()); + + assertEquals("UTF-8", testDetection("414243").name()); + + assertEquals("UTF-16BE", testDetection("0041").name()); + assertEquals("UTF-16LE", testDetection("4100").name()); + assertEquals("UTF-8", testDetection("4142").name()); + + assertEquals("UTF-8", testDetection("41").name()); + + assertEquals("UTF-8", testDetection("").name()); + + // Invalid UCS-4 encodings: these we're fairly certain we cannot handle. + assertThrows(IllegalArgumentException.class, () -> testDetection("00004100")); + assertThrows(IllegalArgumentException.class, () -> testDetection("00410000")); + } + + private Charset testDetection(String hexBytes) { + return UtfTextUtils.detectUtfCharset(hexBytes(hexBytes)); + } + + private static byte[] hexBytes(String hexBytes) { + byte[] bytes = new byte[hexBytes.length() / 2]; + for (int i = 0; i < bytes.length; i++) { + int index = i * 2; + bytes[i] = (byte) Integer.parseUnsignedInt(hexBytes.substring(index, index + 2), 16); + } + return bytes; + } + + @Test + void validateTextConversionFromBytes() { + assertEquals("A", UtfTextUtils.asString(hexBytes("EFBBBF41"), StandardCharsets.UTF_8)); + assertEquals("A", UtfTextUtils.asString(hexBytes("EFBBBF41"), null)); + + assertEquals("A", UtfTextUtils.asString(hexBytes("41"), StandardCharsets.UTF_8)); + assertEquals("A", UtfTextUtils.asString(hexBytes("41"), null)); + } + + @Test + void validateTextConversionFromStreams() throws IOException { + assertEquals("A", + UtfTextUtils.readAllBytes(new ByteArrayInputStream(hexBytes("EFBBBF41")), StandardCharsets.UTF_8)); + assertEquals("A", UtfTextUtils.readAllBytes(new ByteArrayInputStream(hexBytes("EFBBBF41")), null)); + + assertEquals("A", UtfTextUtils.readAllBytes(new ByteArrayInputStream(hexBytes("41")), StandardCharsets.UTF_8)); + assertEquals("A", UtfTextUtils.readAllBytes(new ByteArrayInputStream(hexBytes("41")), null)); + + // Invalid UCS-4 encoding should throw an IOException instead of an + // IllegalArgumentException. + assertThrows(IOException.class, + () -> UtfTextUtils.readAllBytes(new ByteArrayInputStream(hexBytes("0000FFFE")), null)); + } + + @Test + void validateSupportForUnmarkableStreams() throws IOException { + assertEquals("ABCD", + UtfTextUtils.readAllBytes(new UnmarkableInputStream(new ByteArrayInputStream(hexBytes("41424344"))), null)); + } + + private static class UnmarkableInputStream extends FilterInputStream { + public UnmarkableInputStream(InputStream input) { + super(input); + } + + @Override + public synchronized void mark(int ignored) { + } + + @Override + public synchronized void reset() throws IOException { + throw new IOException("mark/reset not supported"); + } + + @Override + public boolean markSupported() { + return false; + } + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/WeakIdentityHashMapTest.java b/lang/java/avro/src/test/java/org/apache/avro/util/WeakIdentityHashMapTest.java new file mode 100644 index 00000000000..8ecda312032 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/util/WeakIdentityHashMapTest.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.avro.util; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.List; + +/** + * This test aims to stress WeakIdentityHashMap class in multithread env. + */ +class WeakIdentityHashMapTest { + + private static final int TEST_SIZE = 4001; + + List data = new ArrayList<>(TEST_SIZE); + + final WeakIdentityHashMap map = new WeakIdentityHashMap<>(); + + List exceptions = new ArrayList<>(TEST_SIZE); + + @Test + void stressMap() { + + for (int i = 1; i <= TEST_SIZE; i++) { + data.add("Data_" + i); + } + + List threads = new ArrayList<>(80); + for (int i = 0; i <= 80; i++) { + final int seed = (i + 1) * 100; + Runnable runnable = () -> rundata(seed); + Thread t = new Thread(runnable); + threads.add(t); + } + threads.forEach(Thread::start); + threads.forEach((Thread t) -> { + try { + t.join(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + }); + Assertions.assertTrue(exceptions.isEmpty()); + } + + void rundata(int seed) { + try { + for (int i = 1; i <= TEST_SIZE; i++) { + String keyValue = data.get((i + seed) % TEST_SIZE); + map.put(keyValue, keyValue); + if (i % 200 == 0) { + sleep(); + } + String keyValueRemove = data.get(((i + seed) * 3) % TEST_SIZE); + map.remove(keyValueRemove); + } + } catch (RuntimeException ex) { + exceptions.add(ex); + } + } + + void sleep() { + try { + Thread.sleep(5); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/internal/TestClassValueCache.java b/lang/java/avro/src/test/java/org/apache/avro/util/internal/TestClassValueCache.java new file mode 100644 index 00000000000..c900be9e31b --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/util/internal/TestClassValueCache.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.util.internal; + +import org.junit.jupiter.api.Test; + +import static org.hamcrest.CoreMatchers.*; +import static org.hamcrest.MatcherAssert.assertThat; + +public class TestClassValueCache { + + @Test + void basic() { + ClassValueCache cache = new ClassValueCache<>(Class::toString); + + String fromCache = cache.apply(String.class); + assertThat(fromCache, is("class java.lang.String")); + + assertThat(cache.apply(String.class), sameInstance(fromCache)); + } + +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/internal/TestJacksonUtils.java b/lang/java/avro/src/test/java/org/apache/avro/util/internal/TestJacksonUtils.java index 12ac094d99e..314685f43da 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/util/internal/TestJacksonUtils.java +++ b/lang/java/avro/src/test/java/org/apache/avro/util/internal/TestJacksonUtils.java @@ -19,8 +19,7 @@ import static org.apache.avro.util.internal.JacksonUtils.toJsonNode; import static org.apache.avro.util.internal.JacksonUtils.toObject; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.*; import com.fasterxml.jackson.databind.node.ArrayNode; import com.fasterxml.jackson.databind.node.BigIntegerNode; @@ -40,7 +39,7 @@ import org.apache.avro.JsonProperties; import org.apache.avro.Schema; import org.apache.avro.SchemaBuilder; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestJacksonUtils { @@ -49,8 +48,8 @@ enum Direction { } @Test - public void testToJsonNode() { - assertEquals(null, toJsonNode(null)); + void testToJsonNode() { + assertNull(toJsonNode(null)); assertEquals(NullNode.getInstance(), toJsonNode(JsonProperties.NULL_VALUE)); assertEquals(BooleanNode.TRUE, toJsonNode(true)); assertEquals(IntNode.valueOf(1), toJsonNode(1)); @@ -73,8 +72,8 @@ public void testToJsonNode() { } @Test - public void testToObject() { - assertEquals(null, toObject(null)); + void testToObject() { + assertNull(toObject(null)); assertEquals(JsonProperties.NULL_VALUE, toObject(NullNode.getInstance())); assertEquals(true, toObject(BooleanNode.TRUE)); assertEquals(1, toObject(IntNode.valueOf(1))); @@ -101,6 +100,11 @@ public void testToObject() { toObject(NullNode.getInstance(), SchemaBuilder.unionOf().nullType().and().intType().endUnion())); assertEquals("a", toObject(TextNode.valueOf("a"), SchemaBuilder.unionOf().stringType().and().intType().endUnion())); + + assertEquals(1, toObject(IntNode.valueOf(1), SchemaBuilder.unionOf().nullType().and().intType().endUnion())); + assertEquals(42.0, + toObject(DoubleNode.valueOf(42.0), SchemaBuilder.unionOf().intType().and().doubleType().endUnion())); + assertEquals("1", toObject(TextNode.valueOf("1"), SchemaBuilder.unionOf().intType().and().stringType().endUnion())); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/springframework/ComparableComparator.java b/lang/java/avro/src/test/java/org/apache/avro/util/springframework/ComparableComparator.java new file mode 100644 index 00000000000..54c887cc167 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/util/springframework/ComparableComparator.java @@ -0,0 +1,44 @@ +/* + * Copyright 2002-2018 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.util.springframework; + +import java.util.Comparator; + +/** + * Comparator that adapts Comparables to the Comparator interface. Mainly for + * internal use in other Comparators, when supposed to work on Comparables. + * + * @author Keith Donald + * @since 1.2.2 + * @param the type of comparable objects that may be compared by this + * comparator + * @see Comparable + */ +class ComparableComparator> implements Comparator { + + /** + * A shared instance of this default comparator. see Comparators#comparable() + */ + @SuppressWarnings("rawtypes") + public static final ComparableComparator INSTANCE = new ComparableComparator(); + + @Override + public int compare(T o1, T o2) { + return o1.compareTo(o2); + } + +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/springframework/NullSafeComparator.java b/lang/java/avro/src/test/java/org/apache/avro/util/springframework/NullSafeComparator.java new file mode 100644 index 00000000000..f621abfe42e --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/util/springframework/NullSafeComparator.java @@ -0,0 +1,132 @@ +/* + * Copyright 2002-2018 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.util.springframework; + +import org.apache.avro.reflect.Nullable; + +import java.util.Comparator; + +/** + * A Comparator that will safely compare nulls to be lower or higher than other + * objects. Can decorate a given Comparator or work on Comparables. + * + * @author Keith Donald + * @author Juergen Hoeller + * @since 1.2.2 + * @param the type of objects that may be compared by this comparator + * @see Comparable + */ +class NullSafeComparator implements Comparator { + + /** + * A shared default instance of this comparator, treating nulls lower than + * non-null objects. see Comparators#nullsLow() + */ + @SuppressWarnings("rawtypes") + public static final NullSafeComparator NULLS_LOW = new NullSafeComparator<>(true); + + /** + * A shared default instance of this comparator, treating nulls higher than + * non-null objects. see Comparators#nullsHigh() + */ + @SuppressWarnings("rawtypes") + public static final NullSafeComparator NULLS_HIGH = new NullSafeComparator<>(false); + + private final Comparator nonNullComparator; + + private final boolean nullsLow; + + /** + * Create a NullSafeComparator that sorts {@code null} based on the provided + * flag, working on Comparables. + *

    + * When comparing two non-null objects, their Comparable implementation will be + * used: this means that non-null elements (that this Comparator will be applied + * to) need to implement Comparable. + *

    + * As a convenience, you can use the default shared instances: + * {@code NullSafeComparator.NULLS_LOW} and + * {@code NullSafeComparator.NULLS_HIGH}. + * + * @param nullsLow whether to treat nulls lower or higher than non-null objects + * @see Comparable + * @see #NULLS_LOW + * @see #NULLS_HIGH + */ + @SuppressWarnings("unchecked") + private NullSafeComparator(boolean nullsLow) { + this.nonNullComparator = ComparableComparator.INSTANCE; + this.nullsLow = nullsLow; + } + + /** + * Create a NullSafeComparator that sorts {@code null} based on the provided + * flag, decorating the given Comparator. + *

    + * When comparing two non-null objects, the specified Comparator will be used. + * The given underlying Comparator must be able to handle the elements that this + * Comparator will be applied to. + * + * @param comparator the comparator to use when comparing two non-null objects + * @param nullsLow whether to treat nulls lower or higher than non-null + * objects + */ + public NullSafeComparator(Comparator comparator, boolean nullsLow) { + // Assert.notNull(comparator, "Non-null Comparator is required"); + this.nonNullComparator = comparator; + this.nullsLow = nullsLow; + } + + @Override + public int compare(@Nullable T o1, @Nullable T o2) { + if (o1 == o2) { + return 0; + } + if (o1 == null) { + return (this.nullsLow ? -1 : 1); + } + if (o2 == null) { + return (this.nullsLow ? 1 : -1); + } + return this.nonNullComparator.compare(o1, o2); + } + + @Override + @SuppressWarnings("unchecked") + public boolean equals(@Nullable Object other) { + if (this == other) { + return true; + } + if (!(other instanceof NullSafeComparator)) { + return false; + } + NullSafeComparator otherComp = (NullSafeComparator) other; + return (this.nonNullComparator.equals(otherComp.nonNullComparator) && this.nullsLow == otherComp.nullsLow); + } + + @Override + public int hashCode() { + return this.nonNullComparator.hashCode() * (this.nullsLow ? -1 : 1); + } + + @Override + public String toString() { + return "NullSafeComparator: non-null comparator [" + this.nonNullComparator + "]; " + + (this.nullsLow ? "nulls low" : "nulls high"); + } + +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/springframework/StopWatch.java b/lang/java/avro/src/test/java/org/apache/avro/util/springframework/StopWatch.java new file mode 100644 index 00000000000..10131fa309a --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/util/springframework/StopWatch.java @@ -0,0 +1,415 @@ +/* + * Copyright 2002-2021 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.util.springframework; + +import org.apache.avro.reflect.Nullable; + +import java.text.NumberFormat; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; + +/** + * Simple stop watch, allowing for timing of a number of tasks, exposing total + * running time and running time for each named task. + * + *

    + * Conceals use of {@link System#nanoTime()}, improving the readability of + * application code and reducing the likelihood of calculation errors. + * + *

    + * Note that this object is not designed to be thread-safe and does not use + * synchronization. + * + *

    + * This class is normally used to verify performance during proof-of-concept + * work and in development, rather than as part of production applications. + * + *

    + * As of Spring Framework 5.2, running time is tracked and reported in + * nanoseconds. + * + * @author Rod Johnson + * @author Juergen Hoeller + * @author Sam Brannen + * @since May 2, 2001 + */ +class StopWatch { + + /** + * Identifier of this {@code StopWatch}. + *

    + * Handy when we have output from multiple stop watches and need to distinguish + * between them in log or console output. + */ + private final String id; + + private boolean keepTaskList = true; + + private final List taskList = new ArrayList<>(1); + + /** Start time of the current task. */ + private long startTimeNanos; + + /** Name of the current task. */ + @Nullable + private String currentTaskName; + + @Nullable + private TaskInfo lastTaskInfo; + + private int taskCount; + + /** Total running time. */ + private long totalTimeNanos; + + /** + * Construct a new {@code StopWatch}. + *

    + * Does not start any task. + */ + public StopWatch() { + this(""); + } + + /** + * Construct a new {@code StopWatch} with the given ID. + *

    + * The ID is handy when we have output from multiple stop watches and need to + * distinguish between them. + *

    + * Does not start any task. + * + * @param id identifier for this stop watch + */ + public StopWatch(String id) { + this.id = id; + } + + /** + * Get the ID of this {@code StopWatch}, as specified on construction. + * + * @return the ID (empty String by default) + * @since 4.2.2 + * @see #StopWatch(String) + */ + public String getId() { + return this.id; + } + + /** + * Configure whether the {@link TaskInfo} array is built over time. + *

    + * Set this to {@code false} when using a {@code StopWatch} for millions of + * intervals; otherwise, the {@code TaskInfo} structure will consume excessive + * memory. + *

    + * Default is {@code true}. + */ + public void setKeepTaskList(boolean keepTaskList) { + this.keepTaskList = keepTaskList; + } + + /** + * Start an unnamed task. + *

    + * The results are undefined if {@link #stop()} or timing methods are called + * without invoking this method first. + * + * @see #start(String) + * @see #stop() + */ + public void start() throws IllegalStateException { + start(""); + } + + /** + * Start a named task. + *

    + * The results are undefined if {@link #stop()} or timing methods are called + * without invoking this method first. + * + * @param taskName the name of the task to start + * @see #start() + * @see #stop() + */ + public void start(String taskName) throws IllegalStateException { + if (this.currentTaskName != null) { + throw new IllegalStateException("Can't start StopWatch: it's already running"); + } + this.currentTaskName = taskName; + this.startTimeNanos = System.nanoTime(); + } + + /** + * Stop the current task. + *

    + * The results are undefined if timing methods are called without invoking at + * least one pair of {@code start()} / {@code stop()} methods. + * + * @see #start() + * @see #start(String) + */ + public void stop() throws IllegalStateException { + if (this.currentTaskName == null) { + throw new IllegalStateException("Can't stop StopWatch: it's not running"); + } + long lastTime = System.nanoTime() - this.startTimeNanos; + this.totalTimeNanos += lastTime; + this.lastTaskInfo = new TaskInfo(this.currentTaskName, lastTime); + if (this.keepTaskList) { + this.taskList.add(this.lastTaskInfo); + } + ++this.taskCount; + this.currentTaskName = null; + } + + /** + * Determine whether this {@code StopWatch} is currently running. + * + * @see #currentTaskName() + */ + public boolean isRunning() { + return (this.currentTaskName != null); + } + + /** + * Get the name of the currently running task, if any. + * + * @since 4.2.2 + * @see #isRunning() + */ + @Nullable + public String currentTaskName() { + return this.currentTaskName; + } + + /** + * Get the time taken by the last task in nanoseconds. + * + * @since 5.2 + * @see #getLastTaskTimeMillis() + */ + public long getLastTaskTimeNanos() throws IllegalStateException { + if (this.lastTaskInfo == null) { + throw new IllegalStateException("No tasks run: can't get last task interval"); + } + return this.lastTaskInfo.getTimeNanos(); + } + + /** + * Get the time taken by the last task in milliseconds. + * + * @see #getLastTaskTimeNanos() + */ + public long getLastTaskTimeMillis() throws IllegalStateException { + if (this.lastTaskInfo == null) { + throw new IllegalStateException("No tasks run: can't get last task interval"); + } + return this.lastTaskInfo.getTimeMillis(); + } + + /** + * Get the name of the last task. + */ + public String getLastTaskName() throws IllegalStateException { + if (this.lastTaskInfo == null) { + throw new IllegalStateException("No tasks run: can't get last task name"); + } + return this.lastTaskInfo.getTaskName(); + } + + /** + * Get the last task as a {@link TaskInfo} object. + */ + public TaskInfo getLastTaskInfo() throws IllegalStateException { + if (this.lastTaskInfo == null) { + throw new IllegalStateException("No tasks run: can't get last task info"); + } + return this.lastTaskInfo; + } + + /** + * Get the total time in nanoseconds for all tasks. + * + * @since 5.2 + * @see #getTotalTimeMillis() + * @see #getTotalTimeSeconds() + */ + public long getTotalTimeNanos() { + return this.totalTimeNanos; + } + + /** + * Get the total time in milliseconds for all tasks. + * + * @see #getTotalTimeNanos() + * @see #getTotalTimeSeconds() + */ + public long getTotalTimeMillis() { + return nanosToMillis(this.totalTimeNanos); + } + + /** + * Get the total time in seconds for all tasks. + * + * @see #getTotalTimeNanos() + * @see #getTotalTimeMillis() + */ + public double getTotalTimeSeconds() { + return nanosToSeconds(this.totalTimeNanos); + } + + /** + * Get the number of tasks timed. + */ + public int getTaskCount() { + return this.taskCount; + } + + /** + * Get an array of the data for tasks performed. + */ + public TaskInfo[] getTaskInfo() { + if (!this.keepTaskList) { + throw new UnsupportedOperationException("Task info is not being kept!"); + } + return this.taskList.toArray(new TaskInfo[0]); + } + + /** + * Get a short description of the total running time. + */ + public String shortSummary() { + return "StopWatch '" + getId() + "': running time = " + getTotalTimeNanos() + " ns"; + } + + /** + * Generate a string with a table describing all tasks performed. + *

    + * For custom reporting, call {@link #getTaskInfo()} and use the task info + * directly. + */ + public String prettyPrint() { + StringBuilder sb = new StringBuilder(shortSummary()); + sb.append('\n'); + if (!this.keepTaskList) { + sb.append("No task info kept"); + } else { + sb.append("---------------------------------------------\n"); + sb.append("ns % Task name\n"); + sb.append("---------------------------------------------\n"); + NumberFormat nf = NumberFormat.getNumberInstance(); + nf.setMinimumIntegerDigits(9); + nf.setGroupingUsed(false); + NumberFormat pf = NumberFormat.getPercentInstance(); + pf.setMinimumIntegerDigits(3); + pf.setGroupingUsed(false); + for (TaskInfo task : getTaskInfo()) { + sb.append(nf.format(task.getTimeNanos())).append(" "); + sb.append(pf.format((double) task.getTimeNanos() / getTotalTimeNanos())).append(" "); + sb.append(task.getTaskName()).append('\n'); + } + } + return sb.toString(); + } + + /** + * Generate an informative string describing all tasks performed + *

    + * For custom reporting, call {@link #getTaskInfo()} and use the task info + * directly. + */ + @Override + public String toString() { + StringBuilder sb = new StringBuilder(shortSummary()); + if (this.keepTaskList) { + for (TaskInfo task : getTaskInfo()) { + sb.append("; [").append(task.getTaskName()).append("] took ").append(task.getTimeNanos()).append(" ns"); + long percent = Math.round(100.0 * task.getTimeNanos() / getTotalTimeNanos()); + sb.append(" = ").append(percent).append('%'); + } + } else { + sb.append("; no task info kept"); + } + return sb.toString(); + } + + private static long nanosToMillis(long duration) { + return TimeUnit.NANOSECONDS.toMillis(duration); + } + + private static double nanosToSeconds(long duration) { + return duration / 1_000_000_000.0; + } + + /** + * Nested class to hold data about one task executed within the + * {@code StopWatch}. + */ + public static final class TaskInfo { + + private final String taskName; + + private final long timeNanos; + + TaskInfo(String taskName, long timeNanos) { + this.taskName = taskName; + this.timeNanos = timeNanos; + } + + /** + * Get the name of this task. + */ + public String getTaskName() { + return this.taskName; + } + + /** + * Get the time in nanoseconds this task took. + * + * @since 5.2 + * @see #getTimeMillis() + * @see #getTimeSeconds() + */ + public long getTimeNanos() { + return this.timeNanos; + } + + /** + * Get the time in milliseconds this task took. + * + * @see #getTimeNanos() + * @see #getTimeSeconds() + */ + public long getTimeMillis() { + return nanosToMillis(this.timeNanos); + } + + /** + * Get the time in seconds this task took. + * + * @see #getTimeMillis() + * @see #getTimeNanos() + */ + public double getTimeSeconds() { + return nanosToSeconds(this.timeNanos); + } + + } + +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/springframework/TestConcurrentReferenceHashMap.java b/lang/java/avro/src/test/java/org/apache/avro/util/springframework/TestConcurrentReferenceHashMap.java new file mode 100644 index 00000000000..c351768864b --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/util/springframework/TestConcurrentReferenceHashMap.java @@ -0,0 +1,688 @@ +/* + * Copyright 2002-2021 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.util.springframework; + +import org.apache.avro.reflect.Nullable; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.apache.avro.util.springframework.ConcurrentReferenceHashMap.Entry; +import org.apache.avro.util.springframework.ConcurrentReferenceHashMap.Reference; +import org.apache.avro.util.springframework.ConcurrentReferenceHashMap.Restructure; + +import java.lang.ref.WeakReference; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.WeakHashMap; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Tests for {@link ConcurrentReferenceHashMap}. + * + * @author Phillip Webb + * @author Juergen Hoeller + */ +class TestConcurrentReferenceHashMap { + + private static final Comparator NULL_SAFE_STRING_SORT = new NullSafeComparator<>( + new ComparableComparator(), true); + + private TestWeakConcurrentCache map = new TestWeakConcurrentCache<>(); + + @Test + void shouldCreateWithDefaults() { + ConcurrentReferenceHashMap map = new ConcurrentReferenceHashMap<>(); + assertThat(map.getSegmentsSize(), equalTo(16)); + assertThat(map.getSegment(0).getSize(), equalTo(1)); + assertThat(map.getLoadFactor(), equalTo(0.75f)); + } + + @Test + void shouldCreateWithInitialCapacity() { + ConcurrentReferenceHashMap map = new ConcurrentReferenceHashMap<>(32); + assertThat(map.getSegmentsSize(), equalTo(16)); + assertThat(map.getSegment(0).getSize(), equalTo(2)); + assertThat(map.getLoadFactor(), equalTo(0.75f)); + } + + @Test + void shouldCreateWithInitialCapacityAndLoadFactor() { + ConcurrentReferenceHashMap map = new ConcurrentReferenceHashMap<>(32, 0.5f); + assertThat(map.getSegmentsSize(), equalTo(16)); + assertThat(map.getSegment(0).getSize(), equalTo(2)); + assertThat(map.getLoadFactor(), equalTo(0.5f)); + } + + @Test + void shouldCreateWithInitialCapacityAndConcurrentLevel() { + ConcurrentReferenceHashMap map = new ConcurrentReferenceHashMap<>(16, 2); + assertThat(map.getSegmentsSize(), equalTo(2)); + assertThat(map.getSegment(0).getSize(), equalTo(8)); + assertThat(map.getLoadFactor(), equalTo(0.75f)); + } + + @Test + void shouldCreateFullyCustom() { + ConcurrentReferenceHashMap map = new ConcurrentReferenceHashMap<>(5, 0.5f, 3); + // concurrencyLevel of 3 ends up as 4 (nearest power of 2) + assertThat(map.getSegmentsSize(), equalTo(4)); + // initialCapacity is 5/4 (rounded up, to nearest power of 2) + assertThat(map.getSegment(0).getSize(), equalTo(2)); + assertThat(map.getLoadFactor(), equalTo(0.5f)); + } + + @Test + void shouldNeedNonNegativeInitialCapacity() { + new ConcurrentReferenceHashMap(0, 1); + IllegalArgumentException e = assertThrows(IllegalArgumentException.class, + () -> new TestWeakConcurrentCache(-1, 1)); + assertTrue(e.getMessage().contains("Initial capacity must not be negative")); + } + + @Test + void shouldNeedPositiveLoadFactor() { + new ConcurrentReferenceHashMap(0, 0.1f, 1); + IllegalArgumentException e = assertThrows(IllegalArgumentException.class, + () -> new TestWeakConcurrentCache(0, 0.0f, 1)); + assertTrue(e.getMessage().contains("Load factor must be positive")); + } + + @Test + void shouldNeedPositiveConcurrencyLevel() { + new ConcurrentReferenceHashMap(1, 1); + IllegalArgumentException e = assertThrows(IllegalArgumentException.class, + () -> new TestWeakConcurrentCache(1, 0)); + assertTrue(e.getMessage().contains("Concurrency level must be positive")); + } + + @Test + void shouldPutAndGet() { + // NOTE we are using mock references so we don't need to worry about GC + assertEquals(0, this.map.size()); + this.map.put(123, "123"); + assertThat(this.map.get(123), equalTo("123")); + assertEquals(1, this.map.size()); + this.map.put(123, "123b"); + assertEquals(1, this.map.size()); + this.map.put(123, null); + assertEquals(1, this.map.size()); + } + + @Test + void shouldReplaceOnDoublePut() { + this.map.put(123, "321"); + this.map.put(123, "123"); + assertThat(this.map.get(123), equalTo("123")); + } + + @Test + void shouldPutNullKey() { + assertNull(this.map.get(null)); + assertThat(this.map.getOrDefault(null, "456"), equalTo("456")); + this.map.put(null, "123"); + assertThat(this.map.get(null), equalTo("123")); + assertThat(this.map.getOrDefault(null, "456"), equalTo("123")); + } + + @Test + void shouldPutNullValue() { + assertNull(this.map.get(123)); + assertThat(this.map.getOrDefault(123, "456"), equalTo("456")); + this.map.put(123, "321"); + assertThat(this.map.get(123), equalTo("321")); + assertThat(this.map.getOrDefault(123, "456"), equalTo("321")); + this.map.put(123, null); + assertNull(this.map.get(123)); + assertNull(this.map.getOrDefault(123, "456")); + } + + @Test + void shouldGetWithNoItems() { + assertNull(this.map.get(123)); + } + + @Test + void shouldApplySupplementalHash() { + Integer key = 123; + this.map.put(key, "123"); + assertNotEquals(this.map.getSupplementalHash(), key.hashCode()); + assertNotEquals(this.map.getSupplementalHash() >> 30 & 0xFF, 0); + } + + @Test + void shouldGetFollowingNexts() { + // Use loadFactor to disable resize + this.map = new TestWeakConcurrentCache<>(1, 10.0f, 1); + this.map.put(1, "1"); + this.map.put(2, "2"); + this.map.put(3, "3"); + assertThat(this.map.getSegment(0).getSize(), equalTo(1)); + assertThat(this.map.get(1), equalTo("1")); + assertThat(this.map.get(2), equalTo("2")); + assertThat(this.map.get(3), equalTo("3")); + assertNull(this.map.get(4)); + } + + @Test + void shouldResize() { + this.map = new TestWeakConcurrentCache<>(1, 0.75f, 1); + this.map.put(1, "1"); + assertThat(this.map.getSegment(0).getSize(), equalTo(1)); + assertThat(this.map.get(1), equalTo("1")); + + this.map.put(2, "2"); + assertThat(this.map.getSegment(0).getSize(), equalTo(2)); + assertThat(this.map.get(1), equalTo("1")); + assertThat(this.map.get(2), equalTo("2")); + + this.map.put(3, "3"); + assertThat(this.map.getSegment(0).getSize(), equalTo(4)); + assertThat(this.map.get(1), equalTo("1")); + assertThat(this.map.get(2), equalTo("2")); + assertThat(this.map.get(3), equalTo("3")); + + this.map.put(4, "4"); + assertThat(this.map.getSegment(0).getSize(), equalTo(8)); + assertThat(this.map.get(4), equalTo("4")); + + // Putting again should not increase the count + for (int i = 1; i <= 5; i++) { + this.map.put(i, String.valueOf(i)); + } + assertThat(this.map.getSegment(0).getSize(), equalTo(8)); + assertThat(this.map.get(5), equalTo("5")); + } + + @Test + void shouldPurgeOnGet() { + this.map = new TestWeakConcurrentCache<>(1, 0.75f, 1); + for (int i = 1; i <= 5; i++) { + this.map.put(i, String.valueOf(i)); + } + this.map.getMockReference(1, Restructure.NEVER).queueForPurge(); + this.map.getMockReference(3, Restructure.NEVER).queueForPurge(); + assertNull(this.map.getReference(1, Restructure.WHEN_NECESSARY)); + assertThat(this.map.get(2), equalTo("2")); + assertNull(this.map.getReference(3, Restructure.WHEN_NECESSARY)); + assertThat(this.map.get(4), equalTo("4")); + assertThat(this.map.get(5), equalTo("5")); + } + + @Test + void shouldPurgeOnPut() { + this.map = new TestWeakConcurrentCache<>(1, 0.75f, 1); + for (int i = 1; i <= 5; i++) { + this.map.put(i, String.valueOf(i)); + } + this.map.getMockReference(1, Restructure.NEVER).queueForPurge(); + this.map.getMockReference(3, Restructure.NEVER).queueForPurge(); + this.map.put(1, "1"); + assertThat(this.map.get(1), equalTo("1")); + assertThat(this.map.get(2), equalTo("2")); + assertNull(this.map.getReference(3, Restructure.WHEN_NECESSARY)); + assertThat(this.map.get(4), equalTo("4")); + assertThat(this.map.get(5), equalTo("5")); + } + + @Test + void shouldPutIfAbsent() { + assertNull(this.map.putIfAbsent(123, "123")); + assertThat(this.map.putIfAbsent(123, "123b"), equalTo("123")); + assertThat(this.map.get(123), equalTo("123")); + } + + @Test + void shouldPutIfAbsentWithNullValue() { + assertNull(this.map.putIfAbsent(123, null)); + assertNull(this.map.putIfAbsent(123, "123")); + assertNull(this.map.get(123)); + } + + @Test + void shouldPutIfAbsentWithNullKey() { + assertNull(this.map.putIfAbsent(null, "123")); + assertThat(this.map.putIfAbsent(null, "123b"), equalTo("123")); + assertThat(this.map.get(null), equalTo("123")); + } + + @Test + void shouldRemoveKeyAndValue() { + this.map.put(123, "123"); + assertFalse(this.map.remove(123, "456")); + assertThat(this.map.get(123), equalTo("123")); + assertTrue(this.map.remove(123, "123")); + assertFalse(this.map.containsKey(123)); + assertTrue(this.map.isEmpty()); + } + + @Test + void shouldRemoveKeyAndValueWithExistingNull() { + this.map.put(123, null); + assertFalse(this.map.remove(123, "456")); + assertNull(this.map.get(123)); + assertTrue(this.map.remove(123, null)); + assertFalse(this.map.containsKey(123)); + assertTrue(this.map.isEmpty()); + } + + @Test + void shouldReplaceOldValueWithNewValue() { + this.map.put(123, "123"); + assertFalse(this.map.replace(123, "456", "789")); + assertThat(this.map.get(123), equalTo("123")); + assertTrue(this.map.replace(123, "123", "789")); + assertThat(this.map.get(123), equalTo("789")); + } + + @Test + void shouldReplaceOldNullValueWithNewValue() { + this.map.put(123, null); + assertFalse(this.map.replace(123, "456", "789")); + assertNull(this.map.get(123)); + assertTrue(this.map.replace(123, null, "789")); + assertThat(this.map.get(123), equalTo("789")); + } + + @Test + void shouldReplaceValue() { + this.map.put(123, "123"); + assertThat(this.map.replace(123, "456"), equalTo("123")); + assertThat(this.map.get(123), equalTo("456")); + } + + @Test + void shouldReplaceNullValue() { + this.map.put(123, null); + assertNull(this.map.replace(123, "456")); + assertThat(this.map.get(123), equalTo("456")); + } + + @Test + void shouldGetSize() { + assertEquals(0, this.map.size()); + this.map.put(123, "123"); + this.map.put(123, null); + this.map.put(456, "456"); + assertEquals(2, this.map.size()); + } + + @Test + void shouldSupportIsEmpty() { + assertTrue(this.map.isEmpty()); + this.map.put(123, "123"); + this.map.put(123, null); + this.map.put(456, "456"); + assertFalse(this.map.isEmpty()); + } + + @Test + void shouldContainKey() { + assertFalse(this.map.containsKey(123)); + assertFalse(this.map.containsKey(456)); + this.map.put(123, "123"); + this.map.put(456, null); + assertTrue(this.map.containsKey(123)); + assertTrue(this.map.containsKey(456)); + } + + @Test + void shouldContainValue() { + assertFalse(this.map.containsValue("123")); + assertFalse(this.map.containsValue(null)); + this.map.put(123, "123"); + this.map.put(456, null); + assertTrue(this.map.containsValue("123")); + assertTrue(this.map.containsValue(null)); + } + + @Test + void shouldRemoveWhenKeyIsInMap() { + this.map.put(123, null); + this.map.put(456, "456"); + this.map.put(null, "789"); + assertNull(this.map.remove(123)); + assertThat(this.map.remove(456), equalTo("456")); + assertThat(this.map.remove(null), equalTo("789")); + assertTrue(this.map.isEmpty()); + } + + @Test + void shouldRemoveWhenKeyIsNotInMap() { + assertNull(this.map.remove(123)); + assertNull(this.map.remove(null)); + assertTrue(this.map.isEmpty()); + } + + @Test + void shouldPutAll() { + Map m = new HashMap<>(); + m.put(123, "123"); + m.put(456, null); + m.put(null, "789"); + this.map.putAll(m); + assertEquals(3, this.map.size()); + assertThat(this.map.get(123), equalTo("123")); + assertNull(this.map.get(456)); + assertThat(this.map.get(null), equalTo("789")); + } + + @Test + void shouldClear() { + this.map.put(123, "123"); + this.map.put(456, null); + this.map.put(null, "789"); + this.map.clear(); + assertEquals(0, this.map.size()); + assertFalse(this.map.containsKey(123)); + assertFalse(this.map.containsKey(456)); + assertFalse(this.map.containsKey(null)); + } + + @Test + void shouldGetKeySet() { + this.map.put(123, "123"); + this.map.put(456, null); + this.map.put(null, "789"); + Set expected = new HashSet<>(); + expected.add(123); + expected.add(456); + expected.add(null); + assertThat(this.map.keySet(), equalTo(expected)); + } + + @Test + void shouldGetValues() { + this.map.put(123, "123"); + this.map.put(456, null); + this.map.put(null, "789"); + List actual = new ArrayList<>(this.map.values()); + List expected = new ArrayList<>(); + expected.add("123"); + expected.add(null); + expected.add("789"); + actual.sort(NULL_SAFE_STRING_SORT); + expected.sort(NULL_SAFE_STRING_SORT); + assertThat(actual, equalTo(expected)); + } + + @Test + void shouldGetEntrySet() { + this.map.put(123, "123"); + this.map.put(456, null); + this.map.put(null, "789"); + HashMap expected = new HashMap<>(); + expected.put(123, "123"); + expected.put(456, null); + expected.put(null, "789"); + assertThat(this.map.entrySet(), equalTo(expected.entrySet())); + } + + @Test + void shouldGetEntrySetFollowingNext() { + // Use loadFactor to disable resize + this.map = new TestWeakConcurrentCache<>(1, 10.0f, 1); + this.map.put(1, "1"); + this.map.put(2, "2"); + this.map.put(3, "3"); + HashMap expected = new HashMap<>(); + expected.put(1, "1"); + expected.put(2, "2"); + expected.put(3, "3"); + assertThat(this.map.entrySet(), equalTo(expected.entrySet())); + } + + @Test + void shouldRemoveViaEntrySet() { + this.map.put(1, "1"); + this.map.put(2, "2"); + this.map.put(3, "3"); + Iterator> iterator = this.map.entrySet().iterator(); + iterator.next(); + iterator.next(); + iterator.remove(); + assertThrows(IllegalStateException.class, iterator::remove); + iterator.next(); + assertFalse(iterator.hasNext()); + assertEquals(2, this.map.size()); + assertFalse(this.map.containsKey(2)); + } + + @Test + void shouldSetViaEntrySet() { + this.map.put(1, "1"); + this.map.put(2, "2"); + this.map.put(3, "3"); + Iterator> iterator = this.map.entrySet().iterator(); + iterator.next(); + iterator.next().setValue("2b"); + iterator.next(); + assertFalse(iterator.hasNext()); + assertEquals(3, this.map.size()); + assertThat(this.map.get(2), equalTo("2b")); + } + + @Test + void containsViaEntrySet() { + this.map.put(1, "1"); + this.map.put(2, "2"); + this.map.put(3, "3"); + Set> entrySet = this.map.entrySet(); + Set> copy = new HashMap<>(this.map).entrySet(); + copy.forEach(entry -> assertTrue(entrySet.contains(entry))); + this.map.put(1, "A"); + this.map.put(2, "B"); + this.map.put(3, "C"); + copy.forEach(entry -> assertFalse(entrySet.contains(entry))); + this.map.put(1, "1"); + this.map.put(2, "2"); + this.map.put(3, "3"); + copy.forEach(entry -> assertTrue(entrySet.contains(entry))); + entrySet.clear(); + copy.forEach(entry -> assertFalse(entrySet.contains(entry))); + } + + @Test + @Disabled("Intended for use during development only") + void shouldBeFasterThanSynchronizedMap() throws InterruptedException { + Map> synchronizedMap = Collections + .synchronizedMap(new WeakHashMap>()); + StopWatch mapTime = timeMultiThreaded("SynchronizedMap", synchronizedMap, + v -> new WeakReference<>(String.valueOf(v))); + System.out.println(mapTime.prettyPrint()); + + this.map.setDisableTestHooks(true); + StopWatch cacheTime = timeMultiThreaded("WeakConcurrentCache", this.map, String::valueOf); + System.out.println(cacheTime.prettyPrint()); + + // We should be at least 4 time faster + assertTrue(cacheTime.getTotalTimeSeconds() < (mapTime.getTotalTimeSeconds() / 4.0)); + } + + @Test + void shouldSupportNullReference() { + // GC could happen during restructure so we must be able to create a reference + // for a null entry + map.createReferenceManager().createReference(null, 1234, null); + } + + /** + * Time a multi-threaded access to a cache. + * + * @return the timing stopwatch + */ + private StopWatch timeMultiThreaded(String id, final Map map, ValueFactory factory) + throws InterruptedException { + + StopWatch stopWatch = new StopWatch(id); + for (int i = 0; i < 500; i++) { + map.put(i, factory.newValue(i)); + } + Thread[] threads = new Thread[30]; + stopWatch.start("Running threads"); + for (int threadIndex = 0; threadIndex < threads.length; threadIndex++) { + threads[threadIndex] = new Thread("Cache access thread " + threadIndex) { + @Override + public void run() { + for (int j = 0; j < 1000; j++) { + for (int i = 0; i < 1000; i++) { + map.get(i); + } + } + } + }; + } + for (Thread thread : threads) { + thread.start(); + } + + for (Thread thread : threads) { + if (thread.isAlive()) { + thread.join(2000); + } + } + stopWatch.stop(); + return stopWatch; + } + + private interface ValueFactory { + + V newValue(int k); + } + + private static class TestWeakConcurrentCache extends ConcurrentReferenceHashMap { + + private int supplementalHash; + + private final LinkedList> queue = new LinkedList<>(); + + private boolean disableTestHooks; + + public TestWeakConcurrentCache() { + super(); + } + + public void setDisableTestHooks(boolean disableTestHooks) { + this.disableTestHooks = disableTestHooks; + } + + public TestWeakConcurrentCache(int initialCapacity, float loadFactor, int concurrencyLevel) { + super(initialCapacity, loadFactor, concurrencyLevel); + } + + public TestWeakConcurrentCache(int initialCapacity, int concurrencyLevel) { + super(initialCapacity, concurrencyLevel); + } + + @Override + protected int getHash(@Nullable Object o) { + if (this.disableTestHooks) { + return super.getHash(o); + } + // For testing we want more control of the hash + this.supplementalHash = super.getHash(o); + return (o != null ? o.hashCode() : 0); + } + + public int getSupplementalHash() { + return this.supplementalHash; + } + + @Override + protected ReferenceManager createReferenceManager() { + return new ReferenceManager() { + @Override + public Reference createReference(Entry entry, int hash, @Nullable Reference next) { + if (TestWeakConcurrentCache.this.disableTestHooks) { + return super.createReference(entry, hash, next); + } + return new MockReference<>(entry, hash, next, TestWeakConcurrentCache.this.queue); + } + + @Override + public Reference pollForPurge() { + if (TestWeakConcurrentCache.this.disableTestHooks) { + return super.pollForPurge(); + } + return TestWeakConcurrentCache.this.queue.isEmpty() ? null : TestWeakConcurrentCache.this.queue.removeFirst(); + } + }; + } + + public MockReference getMockReference(K key, Restructure restructure) { + return (MockReference) super.getReference(key, restructure); + } + } + + private static class MockReference implements Reference { + + private final int hash; + + private Entry entry; + + private final Reference next; + + private final LinkedList> queue; + + public MockReference(Entry entry, int hash, Reference next, LinkedList> queue) { + this.hash = hash; + this.entry = entry; + this.next = next; + this.queue = queue; + } + + @Override + public Entry get() { + return this.entry; + } + + @Override + public int getHash() { + return this.hash; + } + + @Override + public Reference getNext() { + return this.next; + } + + @Override + public void release() { + this.queue.add(this); + this.entry = null; + } + + public void queueForPurge() { + this.queue.add(this); + } + } + +} diff --git a/lang/rust/.activate.sh b/lang/java/avro/src/test/resources/META-INF/services/org.apache.avro.Conversion similarity index 51% rename from lang/rust/.activate.sh rename to lang/java/avro/src/test/resources/META-INF/services/org.apache.avro.Conversion index 9ff85b2fa8b..890ba764260 100644 --- a/lang/rust/.activate.sh +++ b/lang/java/avro/src/test/resources/META-INF/services/org.apache.avro.Conversion @@ -1,5 +1,3 @@ -#!/bin/bash - # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -8,13 +6,12 @@ # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# https://www.apache.org/licenses/LICENSE-2.0 # -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. -make install-hooks +org.apache.avro.CustomTypeConverter diff --git a/lang/rust/rustfmt.toml b/lang/java/avro/src/test/resources/META-INF/services/org.apache.avro.FormattedSchemaParser similarity index 50% rename from lang/rust/rustfmt.toml rename to lang/java/avro/src/test/resources/META-INF/services/org.apache.avro.FormattedSchemaParser index 7269b105a72..b2db6ddb269 100644 --- a/lang/rust/rustfmt.toml +++ b/lang/java/avro/src/test/resources/META-INF/services/org.apache.avro.FormattedSchemaParser @@ -1,3 +1,4 @@ +# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -6,14 +7,12 @@ # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -edition = "2018" -merge_imports = true +org.apache.avro.DummySchemaParser diff --git a/lang/java/avro/src/test/resources/META-INF/services/org.apache.avro.LogicalTypes$LogicalTypeFactory b/lang/java/avro/src/test/resources/META-INF/services/org.apache.avro.LogicalTypes$LogicalTypeFactory new file mode 100644 index 00000000000..b55c233ae46 --- /dev/null +++ b/lang/java/avro/src/test/resources/META-INF/services/org.apache.avro.LogicalTypes$LogicalTypeFactory @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.avro.CustomTypeLogicalTypeFactory diff --git a/lang/java/avro/src/test/resources/TestRecordWithLogicalTypes.avsc b/lang/java/avro/src/test/resources/TestRecordWithLogicalTypes.avsc index f5d212917f4..5f5e870f9c7 100644 --- a/lang/java/avro/src/test/resources/TestRecordWithLogicalTypes.avsc +++ b/lang/java/avro/src/test/resources/TestRecordWithLogicalTypes.avsc @@ -40,6 +40,12 @@ "type" : "long", "logicalType" : "timestamp-millis" } - } ] + }, { + "name" : "bd", + "type" : { + "type" : "bytes", + "logicalType" : "big-decimal" + } + } ] } diff --git a/lang/java/avro/src/test/resources/TestRecordWithMapsAndArrays.avsc b/lang/java/avro/src/test/resources/TestRecordWithMapsAndArrays.avsc new file mode 100644 index 00000000000..d19c0d8dfb9 --- /dev/null +++ b/lang/java/avro/src/test/resources/TestRecordWithMapsAndArrays.avsc @@ -0,0 +1,47 @@ +{ + "type": "record", + "name": "TestRecordWithMapsAndArrays", + "namespace": "org.apache.avro.specific", + "fields": [ + { + "name": "arr", + "type": { + "type": "array", + "items": "string", + "default": [] + } + }, + { + "name": "map", + "type": { + "type": "map", + "values": "long", + "default": {} + } + }, + { + "name": "nested_arr", + "type": { + "type": "array", + "items": { + "type": "array", + "items": "string", + "default": [] + }, + "default": [] + } + }, + { + "name": "nested_map", + "type": { + "type": "map", + "values": { + "type": "map", + "values": "long", + "default": {} + }, + "default": {} + } + } + ] +} diff --git a/lang/java/avro/src/test/resources/TestUnionRecord.avsc b/lang/java/avro/src/test/resources/TestUnionRecord.avsc new file mode 100644 index 00000000000..36241c8b601 --- /dev/null +++ b/lang/java/avro/src/test/resources/TestUnionRecord.avsc @@ -0,0 +1,23 @@ +[ + "null", + { + "namespace": "org.apache.avro.specific", + "type": "record", + "name": "TestUnionRecord", + "fields": [ + { + "name": "amount", + "type": [ + "null", + { + "type": "bytes", + "logicalType": "decimal", + "precision": 31, + "scale": 8 + } + ], + "default": null + } + ] + } +] diff --git a/lang/java/avro/src/test/resources/multipleFile/ApplicationEvent.avsc b/lang/java/avro/src/test/resources/multipleFile/ApplicationEvent.avsc new file mode 100644 index 00000000000..efc7fbf6139 --- /dev/null +++ b/lang/java/avro/src/test/resources/multipleFile/ApplicationEvent.avsc @@ -0,0 +1,44 @@ +{ + "namespace": "model", + "type": "record", + "doc": "", + "name": "ApplicationEvent", + "fields": [ + { + "name": "applicationId", + "type": "string", + "doc": "Application ID" + }, + { + "name": "status", + "type": "string", + "doc": "Application Status" + }, + { + "name": "documents", + "type": ["null", { + "type": "array", + "items": "model.DocumentInfo" + }], + "doc": "", + "default": null + }, + { + "name": "response", + "type": { + "namespace": "model", + "type": "record", + "doc": "", + "name": "MyResponse", + "fields": [ + { + "name": "isSuccessful", + "type": "boolean", + "doc": "Indicator for successful or unsuccessful call" + } + ] + } + } + ] + +} diff --git a/lang/java/avro/src/test/resources/multipleFile/DocumentInfo.avsc b/lang/java/avro/src/test/resources/multipleFile/DocumentInfo.avsc new file mode 100644 index 00000000000..95dd4243ea6 --- /dev/null +++ b/lang/java/avro/src/test/resources/multipleFile/DocumentInfo.avsc @@ -0,0 +1,19 @@ +{ + "namespace": "model", + "type": "record", + "doc": "", + "name": "DocumentInfo", + "fields": [ + { + "name": "documentId", + "type": "string", + "doc": "Document ID" + }, + { + "name": "filePath", + "type": "string", + "doc": "Document Path" + } + ] + +} diff --git a/lang/java/avro/src/test/resources/multipleFile/MyResponse.avsc b/lang/java/avro/src/test/resources/multipleFile/MyResponse.avsc new file mode 100644 index 00000000000..ac6d08291d9 --- /dev/null +++ b/lang/java/avro/src/test/resources/multipleFile/MyResponse.avsc @@ -0,0 +1,14 @@ +{ + "namespace": "model", + "type": "record", + "doc": "", + "name": "MyResponse", + "fields": [ + { + "name": "isSuccessful", + "type": "boolean", + "doc": "Indicator for successful or unsuccessful call" + } + ] + +} diff --git a/lang/java/avro/src/test/resources/multipleFile/README.md b/lang/java/avro/src/test/resources/multipleFile/README.md new file mode 100644 index 00000000000..fe3541b660e --- /dev/null +++ b/lang/java/avro/src/test/resources/multipleFile/README.md @@ -0,0 +1,8 @@ +## test for parsing multiple files. +This folder aims to test `public List Schema.parse(Iterable sources) throws IOException` method. + +The objective is to check that a record schema define in a file can be use in another record schema as a field type. +Here, ApplicationEvent.avsc file contains a field of type DocumentInfo, defined in file DocumentInfo.avsc. + +The is written at TestSchema.testParseMultipleFile. + diff --git a/lang/java/build.sh b/lang/java/build.sh index 96fdb3489cb..5020a6ed0ca 100755 --- a/lang/java/build.sh +++ b/lang/java/build.sh @@ -16,6 +16,7 @@ # limitations under the License. set -e +set -x usage() { echo "Usage: $0 {lint|test|dist|clean}" @@ -31,15 +32,17 @@ main() { mvn -B spotless:apply ;; test) - mvn -B test + mvn -B verify # Test the modules that depend on hadoop using Hadoop 2 - mvn -B test -Phadoop2 + mvn -Dmaven.build.cache.enabled=false -B test -Phadoop2 ;; dist) mvn -P dist package -DskipTests javadoc:aggregate ;; clean) mvn clean + # Remove spotless P2 cache: it contains absolute paths (failing if running both in and out of docker) + rm -rf ~/.m2/repository/dev/equo/p2-data/queries ;; *) usage diff --git a/lang/java/compiler/pom.xml b/lang/java/compiler/pom.xml index 863af78181b..be3163cac0a 100644 --- a/lang/java/compiler/pom.xml +++ b/lang/java/compiler/pom.xml @@ -1,11 +1,11 @@ - org.codehaus.mojo + org.javacc.plugin javacc-maven-plugin @@ -133,11 +133,12 @@ test -classpath - + org.apache.avro.compiler.specific.SchemaTask ${project.basedir}/src/test/resources/full_record_v1.avsc ${project.basedir}/src/test/resources/full_record_v2.avsc - ${project.basedir}/target/generated-test-sources + ${project.basedir}/src/test/resources/regression_error_field_in_record.avsc + ${project.basedir}/target/generated-test-sources/javacc @@ -149,10 +150,8 @@ add-test-source generate-test-sources @@ -161,16 +160,14 @@ - ${project.basedir}/target/generated-test-sources + ${project.basedir}/target/generated-test-sources/javacc add-source generate-sources @@ -186,37 +183,8 @@ - - - - org.eclipse.m2e - lifecycle-mapping - 1.0.0 - - - - - - org.codehaus.mojo - exec-maven-plugin - [1.0,) - - exec - - - - - - - - - - - - - ${project.groupId} @@ -225,8 +193,8 @@ org.apache.commons - commons-lang3 - ${commons-lang.version} + commons-text + ${commons-text.version} org.apache.velocity @@ -245,4 +213,42 @@ + + + m2e + + m2e.version + + + + + + org.eclipse.m2e + lifecycle-mapping + 1.0.0 + + + + + + org.codehaus.mojo + exec-maven-plugin + [1.0,) + + exec + + + + + + + + + + + + + + + diff --git a/lang/java/compiler/src/main/java/org/apache/avro/compiler/idl/DocCommentHelper.java b/lang/java/compiler/src/main/java/org/apache/avro/compiler/idl/DocCommentHelper.java new file mode 100644 index 00000000000..5d0ec5218dd --- /dev/null +++ b/lang/java/compiler/src/main/java/org/apache/avro/compiler/idl/DocCommentHelper.java @@ -0,0 +1,135 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.compiler.idl; + +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Utility class with {@code ThreadLocal} fields that allow the generated + * classes {@link Idl} and {@link IdlTokenManager} to exchange documentation + * comments without forcing explicit parsing of documentation comments. + * + * The reason this works is that all calls to this class happen within a call to + * the method {@link Idl#CompilationUnit()} (either directly or indirectly). + */ +public class DocCommentHelper { + /** + * Pattern to match the common whitespace indents in a multi-line String. + * Doesn't match a single-line String, fully matches any multi-line String. + * + * To use: match on a {@link String#trim() trimmed} String, and then replace all + * newlines followed by the group "indent" with a newline. + */ + private static final Pattern WS_INDENT = Pattern.compile("(?U).*\\R(?\\h*).*(?:\\R\\k.*)*"); + /** + * Pattern to match the whitespace indents plus common stars (1 or 2) in a + * multi-line String. If a String fully matches, replace all occurrences of a + * newline followed by whitespace and then the group "stars" with a newline. + * + * Note: partial matches are invalid. + */ + private static final Pattern STAR_INDENT = Pattern.compile("(?U)(?\\*{1,2}).*(?:\\R\\h*\\k.*)*"); + + private static final ThreadLocal DOC = new ThreadLocal<>(); + private static final ThreadLocal> WARNINGS = ThreadLocal.withInitial(ArrayList::new); + + /** + * Return all warnings that were encountered while parsing, once. Subsequent + * calls before parsing again will return an empty list. + */ + static List getAndClearWarnings() { + List warnings = WARNINGS.get(); + WARNINGS.remove(); + return warnings; + } + + static void setDoc(Token token) { + DocComment newDocComment = new DocComment(token); + DocComment oldDocComment = DOC.get(); + if (oldDocComment != null) { + WARNINGS.get() + .add(String.format( + "Found documentation comment at line %d, column %d. Ignoring previous one at line %d, column %d: \"%s\"\n" + + "Did you mean to use a multiline comment ( /* ... */ ) instead?", + newDocComment.line, newDocComment.column, oldDocComment.line, oldDocComment.column, oldDocComment.text)); + } + DOC.set(newDocComment); + } + + /** + * Clear any documentation (and generate a warning if there was). + * + * This method should NOT be used after an optional component in a grammar + * (i.e., after a @code{[â€Ļ]} or @code{â€Ļ*} construct), because the optional + * grammar part may have already caused parsing a doc comment special token + * placed after the code block. + */ + static void clearDoc() { + DocComment oldDocComment = DOC.get(); + if (oldDocComment != null) { + WARNINGS.get() + .add(String.format( + "Ignoring out-of-place documentation comment at line %d, column %d: \"%s\"\n" + + "Did you mean to use a multiline comment ( /* ... */ ) instead?", + oldDocComment.line, oldDocComment.column, oldDocComment.text)); + } + DOC.remove(); + } + + static String getDoc() { + DocComment docComment = DOC.get(); + DOC.remove(); + return docComment == null ? null : docComment.text; + } + + /* Package private to facilitate testing */ + static String stripIndents(String doc) { + Matcher starMatcher = STAR_INDENT.matcher(doc); + if (starMatcher.matches()) { + return doc.replaceAll("(?U)(?:^|(\\R)\\h*)\\Q" + starMatcher.group("stars") + "\\E\\h?", "$1"); + } + + Matcher whitespaceMatcher = WS_INDENT.matcher(doc); + if (whitespaceMatcher.matches()) { + return doc.replaceAll("(?U)(\\R)" + whitespaceMatcher.group("indent"), "$1"); + } + + return doc; + } + + private static class DocComment { + private final String text; + private final int line; + private final int column; + + DocComment(Token token) { + // The token is everything after the initial '/**', including all + // whitespace and the ending '*/' + int tokenLength = token.image.length(); + this.text = stripIndents(token.image.substring(0, tokenLength - 2).trim()); + this.line = token.beginLine; + // The preceding token was "/**", and the current token includes + // everything since (also all whitespace). Thus, we can safely subtract 3 + // from the token column to get the start of the doc comment. + this.column = token.beginColumn - 3; + } + } +} diff --git a/lang/java/compiler/src/main/java/org/apache/avro/compiler/idl/IsResolvedSchemaVisitor.java b/lang/java/compiler/src/main/java/org/apache/avro/compiler/idl/IsResolvedSchemaVisitor.java new file mode 100644 index 00000000000..6006ad5f82f --- /dev/null +++ b/lang/java/compiler/src/main/java/org/apache/avro/compiler/idl/IsResolvedSchemaVisitor.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.compiler.idl; + +import org.apache.avro.Schema; +import org.apache.avro.compiler.schema.SchemaVisitor; +import org.apache.avro.compiler.schema.SchemaVisitorAction; + +/** + * This visitor checks if the current schema is fully resolved. + */ +public final class IsResolvedSchemaVisitor implements SchemaVisitor { + boolean hasUnresolvedParts; + + IsResolvedSchemaVisitor() { + hasUnresolvedParts = false; + } + + @Override + public SchemaVisitorAction visitTerminal(Schema terminal) { + hasUnresolvedParts = SchemaResolver.isUnresolvedSchema(terminal); + return hasUnresolvedParts ? SchemaVisitorAction.TERMINATE : SchemaVisitorAction.CONTINUE; + } + + @Override + public SchemaVisitorAction visitNonTerminal(Schema nonTerminal) { + hasUnresolvedParts = SchemaResolver.isUnresolvedSchema(nonTerminal); + if (hasUnresolvedParts) { + return SchemaVisitorAction.TERMINATE; + } + if (nonTerminal.getType() == Schema.Type.RECORD && !nonTerminal.hasFields()) { + // We're still initializing the type... + return SchemaVisitorAction.SKIP_SUBTREE; + } + return SchemaVisitorAction.CONTINUE; + } + + @Override + public SchemaVisitorAction afterVisitNonTerminal(Schema nonTerminal) { + return SchemaVisitorAction.CONTINUE; + } + + @Override + public Boolean get() { + return !hasUnresolvedParts; + } +} diff --git a/lang/java/compiler/src/main/java/org/apache/avro/compiler/idl/ResolvingVisitor.java b/lang/java/compiler/src/main/java/org/apache/avro/compiler/idl/ResolvingVisitor.java index c00252ea7ca..1c7175461cc 100644 --- a/lang/java/compiler/src/main/java/org/apache/avro/compiler/idl/ResolvingVisitor.java +++ b/lang/java/compiler/src/main/java/org/apache/avro/compiler/idl/ResolvingVisitor.java @@ -139,10 +139,7 @@ public SchemaVisitorAction afterVisitNonTerminal(final Schema nt) { List fields = nt.getFields(); List newFields = new ArrayList<>(fields.size()); for (Schema.Field field : fields) { - Schema.Field newField = new Schema.Field(field.name(), replace.get(field.schema()), field.doc(), - field.defaultVal(), field.order()); - copyAllProperties(field, newField); - newFields.add(newField); + newFields.add(new Field(field, replace.get(field.schema()))); } newSchema.setFields(newFields); } diff --git a/lang/java/compiler/src/main/java/org/apache/avro/compiler/idl/SchemaResolver.java b/lang/java/compiler/src/main/java/org/apache/avro/compiler/idl/SchemaResolver.java index 2da4944640d..e3e1a2ddb76 100644 --- a/lang/java/compiler/src/main/java/org/apache/avro/compiler/idl/SchemaResolver.java +++ b/lang/java/compiler/src/main/java/org/apache/avro/compiler/idl/SchemaResolver.java @@ -17,18 +17,19 @@ */ package org.apache.avro.compiler.idl; +import org.apache.avro.Protocol; +import org.apache.avro.Schema; +import org.apache.avro.compiler.schema.Schemas; + import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.IdentityHashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Function; -import org.apache.avro.Protocol; -import org.apache.avro.Schema; -import org.apache.avro.compiler.schema.Schemas; - /** * Utility class to resolve schemas that are unavailable at the time they are * referenced in the IDL. @@ -44,6 +45,8 @@ private SchemaResolver() { private static final String UR_SCHEMA_NS = "org.apache.avro.compiler"; + private static final AtomicInteger COUNTER = new AtomicInteger(); + /** * Create a schema to represent a "unresolved" schema. (used to represent a * schema where the definition is not known at the time) This concept might be @@ -53,8 +56,8 @@ private SchemaResolver() { * @return */ static Schema unresolvedSchema(final String name) { - Schema schema = Schema.createRecord(UR_SCHEMA_NAME, "unresolved schema", UR_SCHEMA_NS, false, - Collections.EMPTY_LIST); + Schema schema = Schema.createRecord(UR_SCHEMA_NAME + '_' + COUNTER.getAndIncrement(), "unresolved schema", + UR_SCHEMA_NS, false, Collections.EMPTY_LIST); schema.addProp(UR_SCHEMA_ATTR, name); return schema; } @@ -66,8 +69,8 @@ static Schema unresolvedSchema(final String name) { * @return */ static boolean isUnresolvedSchema(final Schema schema) { - return (schema.getType() == Schema.Type.RECORD && schema.getProp(UR_SCHEMA_ATTR) != null - && UR_SCHEMA_NAME.equals(schema.getName()) && UR_SCHEMA_NS.equals(schema.getNamespace())); + return (schema.getType() == Schema.Type.RECORD && schema.getProp(UR_SCHEMA_ATTR) != null && schema.getName() != null + && schema.getName().startsWith(UR_SCHEMA_NAME) && UR_SCHEMA_NS.equals(schema.getNamespace())); } /** @@ -84,14 +87,28 @@ static String getUnresolvedSchemaName(final Schema schema) { } /** - * Will clone the provided protocol while resolving all unreferenced schemas + * Is this a unresolved schema. * - * @param protocol + * @param schema * @return */ + static boolean isFullyResolvedSchema(final Schema schema) { + if (isUnresolvedSchema(schema)) { + return false; + } else { + return Schemas.visit(schema, new IsResolvedSchemaVisitor()); + } + } + + /** + * Will clone the provided protocol while resolving all unreferenced schemas + * + * @param protocol a protocol with possibly unresolved schema references + * @return a protocol without unresolved schema references + */ static Protocol resolve(final Protocol protocol) { Protocol result = new Protocol(protocol.getName(), protocol.getDoc(), protocol.getNamespace()); - final Collection types = protocol.getTypes(); + final Collection types = protocol.getUnresolvedTypes(); // replace unresolved schemas. List newSchemas = new ArrayList<>(types.size()); IdentityHashMap replacements = new IdentityHashMap<>(); diff --git a/lang/java/compiler/src/main/java/org/apache/avro/compiler/schema/Schemas.java b/lang/java/compiler/src/main/java/org/apache/avro/compiler/schema/Schemas.java index 91232f0f5ac..0c0e5ab6725 100644 --- a/lang/java/compiler/src/main/java/org/apache/avro/compiler/schema/Schemas.java +++ b/lang/java/compiler/src/main/java/org/apache/avro/compiler/schema/Schemas.java @@ -21,8 +21,6 @@ import java.util.Collections; import java.util.Deque; import java.util.IdentityHashMap; -import java.util.Iterator; -import java.util.Map; import java.util.Set; import java.util.function.Supplier; import java.util.stream.Collectors; @@ -31,7 +29,7 @@ import org.apache.avro.LogicalType; import org.apache.avro.Schema; import org.apache.avro.Schema.Field; -import org.apache.avro.compiler.specific.SpecificCompiler; +import org.apache.avro.specific.SpecificData; /** * Avro Schema utilities, to traverse... @@ -68,10 +66,7 @@ public static void copyLogicalTypes(final Schema from, final Schema to) { } public static void copyProperties(final JsonProperties from, final JsonProperties to) { - Map objectProps = from.getObjectProps(); - for (Map.Entry entry : objectProps.entrySet()) { - to.addProp(entry.getKey(), entry.getValue()); - } + from.forEachProperty(to::addProp); } public static boolean hasGeneratedJavaClass(final Schema schema) { @@ -89,9 +84,9 @@ public static boolean hasGeneratedJavaClass(final Schema schema) { public static String getJavaClassName(final Schema schema) { String namespace = schema.getNamespace(); if (namespace == null) { - return SpecificCompiler.mangle(schema.getName()); + return SpecificData.mangle(schema.getName()); } else { - return namespace + '.' + SpecificCompiler.mangle(schema.getName()); + return namespace + '.' + SpecificData.mangle(schema.getName()); } } @@ -141,9 +136,8 @@ public static T visit(final Schema start, final SchemaVisitor visitor) { visited.put(schema, schema); break; case RECORD: - Iterator reverseSchemas = schema.getFields().stream().map(Field::schema) - .collect(Collectors.toCollection(ArrayDeque::new)).descendingIterator(); - terminate = visitNonTerminal(visitor, schema, dq, () -> reverseSchemas); + terminate = visitNonTerminal(visitor, schema, dq, () -> schema.getFields().stream().map(Field::schema) + .collect(Collectors.toCollection(ArrayDeque::new)).descendingIterator()); visited.put(schema, schema); break; case UNION: diff --git a/lang/java/compiler/src/main/java/org/apache/avro/compiler/specific/SchemaTask.java b/lang/java/compiler/src/main/java/org/apache/avro/compiler/specific/SchemaTask.java index 49280e50a95..7f38756e979 100644 --- a/lang/java/compiler/src/main/java/org/apache/avro/compiler/specific/SchemaTask.java +++ b/lang/java/compiler/src/main/java/org/apache/avro/compiler/specific/SchemaTask.java @@ -21,16 +21,19 @@ import java.io.IOException; import org.apache.avro.Schema; +import org.apache.avro.SchemaParser; -/** Ant task to generate Java interface and classes for a protocol. */ +/** Ant task to generate Java interface and classes for a schema. */ public class SchemaTask extends ProtocolTask { @Override protected void doCompile(File src, File dest) throws IOException { - final Schema.Parser parser = new Schema.Parser(); - final Schema schema = parser.parse(src); - final SpecificCompiler compiler = new SpecificCompiler(schema); - compiler.setStringType(getStringType()); - compiler.compileToDestination(src, dest); + final SchemaParser parser = new SchemaParser(); + SchemaParser.ParseResult parseResult = parser.parse(src); + for (Schema schema : parseResult.parsedNamedSchemas()) { + final SpecificCompiler compiler = new SpecificCompiler(schema); + compiler.setStringType(getStringType()); + compiler.compileToDestination(src, dest); + } } public static void main(String[] args) throws IOException { diff --git a/lang/java/compiler/src/main/java/org/apache/avro/compiler/specific/SpecificCompiler.java b/lang/java/compiler/src/main/java/org/apache/avro/compiler/specific/SpecificCompiler.java index f9eb43a1386..ea1e1a11b55 100644 --- a/lang/java/compiler/src/main/java/org/apache/avro/compiler/specific/SpecificCompiler.java +++ b/lang/java/compiler/src/main/java/org/apache/avro/compiler/specific/SpecificCompiler.java @@ -17,26 +17,7 @@ */ package org.apache.avro.compiler.specific; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.StringWriter; -import java.io.Writer; -import java.lang.reflect.InvocationTargetException; -import java.nio.file.Files; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedHashMap; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; +import static java.nio.charset.StandardCharsets.UTF_8; import org.apache.avro.Conversion; import org.apache.avro.Conversions; @@ -48,19 +29,39 @@ import org.apache.avro.Schema; import org.apache.avro.Schema.Field; import org.apache.avro.SchemaNormalization; +import org.apache.avro.SchemaParser; import org.apache.avro.data.TimeConversions; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericData.StringType; import org.apache.avro.specific.SpecificData; -import org.apache.commons.lang3.StringUtils; import org.apache.velocity.Template; import org.apache.velocity.VelocityContext; import org.apache.velocity.app.VelocityEngine; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.apache.avro.specific.SpecificData.RESERVED_WORDS; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.StringWriter; +import java.io.Writer; +import java.lang.reflect.InvocationTargetException; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.Stream; /** * Generate specific Java interfaces and classes for protocols and schemas. @@ -105,8 +106,11 @@ void addLogicalTypeConversions(SpecificData specificData) { specificData.addLogicalTypeConversion(new TimeConversions.TimeMicrosConversion()); specificData.addLogicalTypeConversion(new TimeConversions.TimestampMillisConversion()); specificData.addLogicalTypeConversion(new TimeConversions.TimestampMicrosConversion()); + specificData.addLogicalTypeConversion(new TimeConversions.TimestampNanosConversion()); specificData.addLogicalTypeConversion(new TimeConversions.LocalTimestampMicrosConversion()); specificData.addLogicalTypeConversion(new TimeConversions.LocalTimestampMillisConversion()); + specificData.addLogicalTypeConversion(new TimeConversions.LocalTimestampNanosConversion()); + specificData.addLogicalTypeConversion(new Conversions.UUIDConversion()); } private final SpecificData specificData = new SpecificData(); @@ -120,12 +124,20 @@ void addLogicalTypeConversions(SpecificData specificData) { private boolean gettersReturnOptional = false; private boolean optionalGettersForNullableFieldsOnly = false; private boolean createSetters = true; + private boolean createNullSafeAnnotations = false; private boolean createAllArgsConstructor = true; private String outputCharacterEncoding; private boolean enableDecimalLogicalType = false; private String suffix = ".java"; private List additionalVelocityTools = Collections.emptyList(); + private String nullSafeAnnotationNullable = "org.jetbrains.annotations.Nullable"; + private String nullSafeAnnotationNotNull = "org.jetbrains.annotations.NotNull"; + + private String recordSpecificClass = "org.apache.avro.specific.SpecificRecordBase"; + + private String errorSpecificClass = "org.apache.avro.specific.SpecificExceptionBase"; + /* * Used in the record.vm template. */ @@ -133,25 +145,7 @@ public boolean isCreateAllArgsConstructor() { return createAllArgsConstructor; } - /* Reserved words for accessor/mutator methods */ - private static final Set ACCESSOR_MUTATOR_RESERVED_WORDS = new HashSet<>( - Arrays.asList("class", "schema", "classSchema")); - - static { - // Add reserved words to accessor/mutator reserved words - ACCESSOR_MUTATOR_RESERVED_WORDS.addAll(RESERVED_WORDS); - } - - /* Reserved words for error types */ - private static final Set ERROR_RESERVED_WORDS = new HashSet<>(Arrays.asList("message", "cause")); - - static { - // Add accessor/mutator reserved words to error reserved words - ERROR_RESERVED_WORDS.addAll(ACCESSOR_MUTATOR_RESERVED_WORDS); - } - - private static final String FILE_HEADER = "/**\n" + " * Autogenerated by Avro\n" + " *\n" - + " * DO NOT EDIT DIRECTLY\n" + " */\n"; + private static final String FILE_HEADER = "/*\n * Autogenerated by Avro\n *\n * DO NOT EDIT DIRECTLY\n */\n"; public SpecificCompiler(Protocol protocol) { this(); @@ -163,8 +157,20 @@ public SpecificCompiler(Protocol protocol) { } public SpecificCompiler(Schema schema) { + this(Collections.singleton(schema)); + } + + public SpecificCompiler(Collection schemas) { + this(); + for (Schema schema : schemas) { + enqueue(schema); + } + this.protocol = null; + } + + public SpecificCompiler(Iterable schemas) { this(); - enqueue(schema); + schemas.forEach(this::enqueue); this.protocol = null; } @@ -235,6 +241,47 @@ public void setCreateSetters(boolean createSetters) { this.createSetters = createSetters; } + public boolean isCreateNullSafeAnnotations() { + return this.createNullSafeAnnotations; + } + + /** + * Set to true to add @Nullable and @NotNull annotations. By default, JetBrains + * annotations are used (org.jetbrains.annotations.Nullable and + * org.jetbrains.annotations.NotNull) but this can be overridden using + * {@link #setNullSafeAnnotationNullable)} and + * {@link #setNullSafeAnnotationNotNull)}. + */ + public void setCreateNullSafeAnnotations(boolean createNullSafeAnnotations) { + this.createNullSafeAnnotations = createNullSafeAnnotations; + } + + public String getNullSafeAnnotationNullable() { + return this.nullSafeAnnotationNullable; + } + + /** + * Sets the annotation to use for nullable fields. Default is + * "org.jetbrains.annotations.Nullable". The annotation must include the full + * package path. + */ + public void setNullSafeAnnotationNullable(String nullSafeAnnotationNullable) { + this.nullSafeAnnotationNullable = nullSafeAnnotationNullable; + } + + public String getNullSafeAnnotationNotNull() { + return this.nullSafeAnnotationNotNull; + } + + /** + * Sets the annotation to use for non-nullable fields. Default is + * "org.jetbrains.annotations.NotNull". The annotation must include the full + * package path. + */ + public void setNullSafeAnnotationNotNull(String nullSafeAnnotationNotNull) { + this.nullSafeAnnotationNotNull = nullSafeAnnotationNotNull; + } + public boolean isCreateOptionalGetters() { return this.createOptionalGetters; } @@ -371,7 +418,7 @@ private void initializeVelocity() { "org.apache.velocity.runtime.resource.loader.ClasspathResourceLoader"); velocityEngine.addProperty("resource.loader.file.class", "org.apache.velocity.runtime.resource.loader.FileResourceLoader"); - velocityEngine.addProperty("resource.loader.file.path", "/, ."); + velocityEngine.addProperty("resource.loader.file.path", "/, ., "); velocityEngine.setProperty("runtime.strict_mode.enable", true); // Set whitespace gobbling to Backward Compatible (BC) @@ -457,12 +504,16 @@ public static void compileSchema(File src, File dest) throws IOException { * Generates Java classes for a number of schema files. */ public static void compileSchema(File[] srcFiles, File dest) throws IOException { - Schema.Parser parser = new Schema.Parser(); + SchemaParser parser = new SchemaParser(); for (File src : srcFiles) { - Schema schema = parser.parse(src); + parser.parse(src); + } + // FIXME: use lastModified() without causing a NoSuchMethodError in the build + File lastModifiedSourceFile = Stream.of(srcFiles).max(Comparator.comparing(File::lastModified)).orElse(null); + for (Schema schema : parser.getParsedNamedSchemas()) { SpecificCompiler compiler = new SpecificCompiler(schema); - compiler.compileToDestination(src, dest); + compiler.compileToDestination(lastModifiedSourceFile, dest); } } @@ -557,7 +608,7 @@ OutputFile compileInterface(Protocol protocol) { String out = renderTemplate(templateDir + "protocol.vm", context); OutputFile outputFile = new OutputFile(); - String mangledName = mangle(protocol.getName()); + String mangledName = mangleTypeIdentifier(protocol.getName()); outputFile.path = makePath(mangledName, mangle(protocol.getNamespace())); outputFile.contents = out; outputFile.outputCharacterEncoding = outputCharacterEncoding; @@ -629,7 +680,7 @@ OutputFile compile(Schema schema) { } OutputFile outputFile = new OutputFile(); - String name = mangle(schema.getName()); + String name = mangleTypeIdentifier(schema.getName()); outputFile.path = makePath(name, mangle(schema.getNamespace())); outputFile.contents = output; outputFile.outputCharacterEncoding = outputCharacterEncoding; @@ -653,9 +704,7 @@ private Protocol addStringType(Protocol p) { Protocol newP = new Protocol(p.getName(), p.getDoc(), p.getNamespace()); Map types = new LinkedHashMap<>(); - for (Map.Entry a : p.getObjectProps().entrySet()) { - newP.addProp(a.getKey(), a.getValue()); - } + p.forEachProperty(newP::addProp); // annotate types Collection namedTypes = new LinkedHashSet<>(); @@ -795,7 +844,7 @@ private String javaType(Schema schema, boolean checkConvertedLogicalType) { case RECORD: case ENUM: case FIXED: - return mangle(schema.getFullName()); + return SpecificData.mangleFullyQualified(schema.getFullName()); case ARRAY: return "java.util.List<" + javaType(schema.getElementType()) + ">"; case MAP: @@ -856,7 +905,7 @@ public String generateSetterCode(Schema schema, String name, String pname) { /** * Utility for template use. Returns the unboxed java type for a Schema. * - * @deprecated use javaUnbox(Schema, boolean), kept for backward compatibiliby + * @deprecated use javaUnbox(Schema, boolean), kept for backward compatibility * of custom templates */ @Deprecated @@ -920,19 +969,21 @@ public int getNonNullIndex(Schema s) { * record.vm can handle the schema being presented. */ public boolean isCustomCodable(Schema schema) { - if (schema.isError()) - return false; return isCustomCodable(schema, new HashSet<>()); } private boolean isCustomCodable(Schema schema, Set seen) { if (!seen.add(schema)) + // Recursive call: assume custom codable until a caller on the call stack proves + // otherwise. return true; if (schema.getLogicalType() != null) return false; boolean result = true; switch (schema.getType()) { case RECORD: + if (schema.isError()) + return false; for (Schema.Field f : schema.getFields()) result &= isCustomCodable(f.schema(), seen); break; @@ -986,27 +1037,43 @@ public String conversionInstance(Schema schema) { */ public String[] javaAnnotations(JsonProperties props) { final Object value = props.getObjectProp("javaAnnotation"); - if (value == null) - return new String[0]; - if (value instanceof String) + if (value instanceof String && isValidAsAnnotation((String) value)) return new String[] { value.toString() }; if (value instanceof List) { final List list = (List) value; final List annots = new ArrayList<>(list.size()); for (Object o : list) { - annots.add(o.toString()); + if (isValidAsAnnotation(o.toString())) + annots.add(o.toString()); } return annots.toArray(new String[0]); } return new String[0]; } + private static final String PATTERN_IDENTIFIER_PART = "\\p{javaJavaIdentifierStart}\\p{javaJavaIdentifierPart}*"; + private static final String PATTERN_IDENTIFIER = String.format("(?:%s(?:\\.%s)*)", PATTERN_IDENTIFIER_PART, + PATTERN_IDENTIFIER_PART); + private static final String PATTERN_STRING = "\"(?:\\\\[\\\\\"ntfb]|(?", ">"); } /** @@ -1049,47 +1126,84 @@ public static String nullToEmpty(String x) { * Utility for template use. Adds a dollar sign to reserved words. */ public static String mangle(String word) { - return mangle(word, false); + return SpecificData.mangle(word, false); } /** * Utility for template use. Adds a dollar sign to reserved words. */ public static String mangle(String word, boolean isError) { - return mangle(word, isError ? ERROR_RESERVED_WORDS : RESERVED_WORDS); + return SpecificData.mangle(word, isError); + } + + /** + * Utility for template use. Adds a dollar sign to reserved words in type + * identifiers. + */ + public static String mangleTypeIdentifier(String word) { + return SpecificData.mangleTypeIdentifier(word, false); + } + + /** + * Utility for template use. Adds a dollar sign to reserved words in type + * identifiers. + */ + public static String mangleTypeIdentifier(String word, boolean isError) { + return SpecificData.mangle(word, isError); } /** * Utility for template use. Adds a dollar sign to reserved words. */ public static String mangle(String word, Set reservedWords) { - return mangle(word, reservedWords, false); + return SpecificData.mangle(word, reservedWords, false); } /** * Utility for template use. Adds a dollar sign to reserved words. */ public static String mangle(String word, Set reservedWords, boolean isMethod) { - if (StringUtils.isBlank(word)) { - return word; - } - if (word.contains(".")) { - // If the 'word' is really a full path of a class we must mangle just the - String[] packageWords = word.split("\\."); - String[] newPackageWords = new String[packageWords.length]; - - for (int i = 0; i < packageWords.length; i++) { - String oldName = packageWords[i]; - newPackageWords[i] = mangle(oldName, reservedWords, false); - } + return SpecificData.mangle(word, reservedWords, isMethod); + } - return String.join(".", newPackageWords); - } - if (reservedWords.contains(word) || (isMethod && reservedWords - .contains(Character.toLowerCase(word.charAt(0)) + ((word.length() > 1) ? word.substring(1) : "")))) { - return word + "$"; + public boolean canGenerateEqualsAndHashCode(Schema schema) { + return getUsedCustomLogicalTypeFactories(schema).isEmpty(); + } + + public boolean isPrimitiveType(Schema schema) { + return !isUnboxedJavaTypeNullable(schema) && getConvertedLogicalType(schema) == null; + } + + public String hashCodeFor(Schema schema, String name) { + switch (javaUnbox(schema, false)) { + case "int": + return "Integer.hashCode(" + name + ")"; + case "long": + return "Long.hashCode(" + name + ")"; + case "float": + return "Float.hashCode(" + name + ")"; + case "double": + return "Double.hashCode(" + name + ")"; + case "boolean": + return "Boolean.hashCode(" + name + ")"; + default: + // Hashcode of Union is expected to match ordinal + if (schema.getType() == Schema.Type.ENUM || ((schema.getType() == Schema.Type.UNION) + && (schema.getTypes().stream().anyMatch(t -> t.getType() == Schema.Type.ENUM)))) { + if (schema.getType() == Schema.Type.ENUM + || (schema.getTypes().size() == 2 && schema.getTypes().contains(NULL_SCHEMA))) { + return "(" + name + " == null ? 0 : ((java.lang.Enum) " + name + ").ordinal())"; + } else { + return "(" + name + " == null ? 0 : " + name + " instanceof java.lang.Enum ? ((java.lang.Enum) " + name + + ").ordinal() : " + name + ".hashCode())"; + } + } + return "(" + name + " == null ? 0 : " + name + ".hashCode())"; } - return word; + } + + public boolean ignoredField(Field field) { + return field.order() == Field.Order.IGNORE; } /** @@ -1220,14 +1334,10 @@ private static String generateMethodName(Schema schema, Field field, String pref // Check for the special case in which the schema defines two fields whose // names are identical except for the case of the first character: - char firstChar = field.name().charAt(0); - String conflictingFieldName = (Character.isLowerCase(firstChar) ? Character.toUpperCase(firstChar) - : Character.toLowerCase(firstChar)) + (field.name().length() > 1 ? field.name().substring(1) : ""); - boolean fieldNameConflict = schema.getField(conflictingFieldName) != null; + int indexNameConflict = calcNameIndex(field.name(), schema); StringBuilder methodBuilder = new StringBuilder(prefix); - String fieldName = mangle(field.name(), schema.isError() ? ERROR_RESERVED_WORDS : ACCESSOR_MUTATOR_RESERVED_WORDS, - true); + String fieldName = SpecificData.mangleMethod(field.name(), schema.isError()); boolean nextCharToUpper = true; for (int ii = 0; ii < fieldName.length(); ii++) { @@ -1243,16 +1353,75 @@ private static String generateMethodName(Schema schema, Field field, String pref methodBuilder.append(postfix); // If there is a field name conflict append $0 or $1 - if (fieldNameConflict) { + if (indexNameConflict >= 0) { if (methodBuilder.charAt(methodBuilder.length() - 1) != '$') { methodBuilder.append('$'); } - methodBuilder.append(Character.isLowerCase(firstChar) ? '0' : '1'); + methodBuilder.append(indexNameConflict); } return methodBuilder.toString(); } + /** + * Calc name index for getter / setter field in case of conflict as example, + * having a schema with fields __X, _X, _x, X, x should result with indexes __X: + * 3, _X: 2, _x: 1, X: 0 x: None (-1) + * + * @param fieldName : field name. + * @param schema : schema. + * @return index for field. + */ + private static int calcNameIndex(String fieldName, Schema schema) { + // get name without underscore at start + // and calc number of other similar fields with same subname. + int countSimilar = 0; + String pureFieldName = fieldName; + while (!pureFieldName.isEmpty() && pureFieldName.charAt(0) == '_') { + pureFieldName = pureFieldName.substring(1); + if (schema.getField(pureFieldName) != null) { + countSimilar++; + } + String reversed = reverseFirstLetter(pureFieldName); + if (schema.getField(reversed) != null) { + countSimilar++; + } + } + // field name start with upper have +1 + String reversed = reverseFirstLetter(fieldName); + if (!pureFieldName.isEmpty() && Character.isUpperCase(pureFieldName.charAt(0)) + && schema.getField(reversed) != null) { + countSimilar++; + } + + int ret = -1; // if no similar name, no index. + if (countSimilar > 0) { + ret = countSimilar - 1; // index is count similar -1 (start with $0) + } + + return ret; + } + + /** + * Reverse first letter upper <=> lower. __Name <=> __name + * + * @param name : input name. + * @return name with change case of first letter. + */ + private static String reverseFirstLetter(String name) { + StringBuilder builder = new StringBuilder(name); + int index = 0; + while (builder.length() > index && builder.charAt(index) == '_') { + index++; + } + if (builder.length() > index) { + char c = builder.charAt(index); + char inverseC = Character.isLowerCase(c) ? Character.toUpperCase(c) : Character.toLowerCase(c); + builder.setCharAt(index, inverseC); + } + return builder.toString(); + } + /** * Tests whether an unboxed Java type can be set to null */ @@ -1284,4 +1453,20 @@ public static void main(String[] args) throws Exception { public void setOutputCharacterEncoding(String outputCharacterEncoding) { this.outputCharacterEncoding = outputCharacterEncoding; } + + public String getSchemaParentClass(boolean isError) { + if (isError) { + return this.errorSpecificClass; + } else { + return this.recordSpecificClass; + } + } + + public void setRecordSpecificClass(final String recordSpecificClass) { + this.recordSpecificClass = recordSpecificClass; + } + + public void setErrorSpecificClass(final String errorSpecificClass) { + this.errorSpecificClass = errorSpecificClass; + } } diff --git a/lang/java/compiler/src/main/javacc/org/apache/avro/compiler/idl/idl.jj b/lang/java/compiler/src/main/javacc/org/apache/avro/compiler/idl/idl.jj index bfe08a46b24..2d312794c3e 100644 --- a/lang/java/compiler/src/main/javacc/org/apache/avro/compiler/idl/idl.jj +++ b/lang/java/compiler/src/main/javacc/org/apache/avro/compiler/idl/idl.jj @@ -63,6 +63,7 @@ package org.apache.avro.compiler.idl; import java.io.*; import java.net.*; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; @@ -70,6 +71,7 @@ import java.util.Map; import java.net.URL; import org.apache.avro.Schema; +import org.apache.avro.LogicalType; import org.apache.avro.LogicalTypes; import org.apache.avro.Schema.*; import org.apache.avro.Protocol; @@ -79,28 +81,32 @@ import org.apache.avro.util.internal.Accessor; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.*; -import org.apache.commons.lang3.StringEscapeUtils; +import org.apache.commons.text.StringEscapeUtils; /** * Grammar to parse a higher-level language into an Avro Schema. * * Note: each instance is not thread-safe, but multiple separate * instances are safely independent. + * + * @deprecated Use the new org.apache.avro.idl.IdlReader from avro-idl instead. */ +@Deprecated public class Idl implements Closeable { static JsonNodeFactory FACTORY = JsonNodeFactory.instance; + private static final String OPTIONAL_NULLABLE_TYPE_PROPERTY = "org.apache.avro.compiler.idl.Idl.NullableType.optional"; URI inputDir; ClassLoader resourceLoader = null; String namespace; - Map names = new LinkedHashMap(); - - private static final ThreadLocal DOC = new ThreadLocal(); - static void setDoc(String doc) { DOC.set(doc.trim()); } - static String getDoc() { - String doc = DOC.get(); - DOC.set(null); - return doc; + Map names = new LinkedHashMap<>(); + + private List parserWarnings = Collections.emptyList(); + /** + * Return all warnings that were encountered while parsing. + */ + public List getWarningsAfterParsing() { + return parserWarnings; } public Idl(File inputFile) throws IOException { @@ -134,6 +140,7 @@ public class Idl implements Closeable { this.resourceLoader = parent.resourceLoader; } + @SuppressWarnings("RedundantThrows") public void close() throws IOException { jj_input_stream.inputStream.close(); } @@ -156,7 +163,7 @@ public class Idl implements Closeable { JsonNode value = props.get(key); if (!value.isArray()) throw error(key+" property must be array: "+value, token); - List values = new ArrayList(); + List values = new ArrayList<>(); for (JsonNode n : value) if (n.isTextual()) values.add(n.textValue()); @@ -170,16 +177,42 @@ public class Idl implements Closeable { File file = "file".equals(uri.getScheme()) ? new File(uri.getPath()) : null; URL result = null; if (file != null && file.exists()) - result = file.toURI().toURL(); - else if("classpath".equals(uri.getScheme())) - result = this.resourceLoader.getResource(uri.getPath().substring(1)); + result = new URL("file:" + file.getPath()); else if (this.resourceLoader != null) - result = this.resourceLoader.getResource(importFile); + if ("classpath".equals(uri.getScheme())) + result = this.resourceLoader.getResource(uri.getPath().substring(1)); + else + result = this.resourceLoader.getResource(importFile); if (result == null) throw new FileNotFoundException(importFile); return result; } + /** + * For "optional schemas" (recognized by the marker property the NullableType + * production adds), ensure the null schema is in the right place. + * + * @param schema a schema + * @param defaultValue the intended default value + * @return the schema, or an optional schema with null in the right place + */ + private static Schema fixOptionalSchema(Schema schema, JsonNode defaultValue) { + final Object optionalType = schema.getObjectProp(OPTIONAL_NULLABLE_TYPE_PROPERTY); + if (optionalType != null) { + // The schema is a union schema with 2 types: "null" and a non-"null" schema + Schema nullSchema = schema.getTypes().get(0); + Schema nonNullSchema = schema.getTypes().get(1); + boolean nonNullDefault = defaultValue != null && !defaultValue.isNull(); + + // Always return a new schema: this drops the marker property. + if (nonNullDefault) { + return Schema.createUnion(nonNullSchema, nullSchema); + } else { + return Schema.createUnion(nullSchema, nonNullSchema); + } + } + return schema; + } } PARSER_END(Idl) @@ -218,13 +251,13 @@ MORE : SPECIAL_TOKEN : { - <"*/" > {Idl.setDoc(image.substring(0, image.length()-2));} : DEFAULT + "*/" {DocCommentHelper.setDoc(matchedToken);} : DEFAULT } SKIP : { - <"*/" > : DEFAULT + "*/" : DEFAULT } /* RESERVED WORDS AND LITERALS */ @@ -259,6 +292,7 @@ TOKEN : | < TIME: "time_ms" > | < TIMESTAMP: "timestamp_ms" > | < DECIMAL: "decimal" > +| < BIG_DECIMAL: "big_decimal" > | < LOCAL_TIMESTAMP: "local_timestamp_ms" > | < UUID: "uuid" > } @@ -997,6 +1031,7 @@ TOKEN : | < EQUALS: "=" > | < DOT: "." > | < DASH: "-" > +| < QUESTION_MARK: "?" > } TOKEN : @@ -1018,19 +1053,23 @@ TOKEN : Protocol CompilationUnit(): { Protocol p; + DocCommentHelper.getAndClearWarnings(); // Throw away previous results. } { p = ProtocolDeclaration() - ( < "\u001a" > )? + ( "\u001a" )? ( )? - { return SchemaResolver.resolve(p); } + { + parserWarnings = DocCommentHelper.getAndClearWarnings(); + return SchemaResolver.resolve(p); + } } /* * Declaration syntax follows. */ -private Schema NamedSchemaDeclaration(Map props): +private Schema NamedSchemaDeclaration(String doc, Map props): { Schema s; String savedSpace = this.namespace; @@ -1041,9 +1080,9 @@ private Schema NamedSchemaDeclaration(Map props): this.namespace = getTextProp("namespace", props, token); } ( - s = FixedDeclaration() - | s = EnumDeclaration() - | s = RecordDeclaration() + s = FixedDeclaration(doc) + | s = EnumDeclaration(doc) + | s = RecordDeclaration(doc) ) { this.namespace = savedSpace; @@ -1053,9 +1092,12 @@ private Schema NamedSchemaDeclaration(Map props): } else if ("aliases".equals(key)) { // aliases for (String alias : getTextProps("aliases", props, token)) s.addAlias(alias); - } else { // add all other props + } else { // add all other properties Accessor.addProp(s, key, props.get(key)); } + LogicalType logicalType = LogicalTypes.fromSchemaIgnoreInvalid(s); + if (logicalType != null) + logicalType.addToSchema(s); return s; } @@ -1064,11 +1106,10 @@ private Schema NamedSchemaDeclaration(Map props): Schema UnionDefinition(): { Schema s; - List schemata = new ArrayList(); + List schemata = new ArrayList<>(); } { - // TODO should probably disallow other unions here in the parser? - + // Don't disallow unions here: its constructor disallows nested unions and throws a descriptive exception. "union" "{" s = Type() @@ -1088,11 +1129,12 @@ Schema UnionDefinition(): Protocol ProtocolDeclaration(): { - String name; + String doc, name; Protocol p; - Map props = new LinkedHashMap(); + Map props = new LinkedHashMap<>(); } { + doc = Documentation() ( SchemaProperty(props) )* { if (props.containsKey("namespace")) @@ -1101,10 +1143,10 @@ Protocol ProtocolDeclaration(): "protocol" name = Identifier() { - p = new Protocol(name, getDoc(), namespace); + p = new Protocol(name, doc, namespace); for (String key : props.keySet()) if ("namespace".equals(key)) { // already handled: ignore - } else { // add all other props + } else { // add all other properties Accessor.addProp(p, key, props.get(key)); } } @@ -1115,20 +1157,32 @@ Protocol ProtocolDeclaration(): } -Schema EnumDeclaration(): +String Documentation(): +{ + //noinspection ResultOfMethodCallIgnored + getToken(1); // Parse, but don't consume, at least one token; this triggers parsing special tokens like doc comments. +} +{ + // Don't parse anything, just return the doc string + { + return DocCommentHelper.getDoc(); + } +} + + +Schema EnumDeclaration(String doc): { String name; List symbols; String defaultSymbol = null; } { - "enum" { String doc = getDoc(); } + "enum" name = Identifier() symbols = EnumBody() - [ defaultSymbol=Identifier() ] + [ defaultSymbol=Identifier() { DocCommentHelper.clearDoc(); } ] { - Schema s = Schema.createEnum(name, doc, this.namespace, symbols, - defaultSymbol); + Schema s = Schema.createEnum(name, doc, namespace, symbols, defaultSymbol); names.put(s.getFullName(), s); return s; } @@ -1136,13 +1190,14 @@ Schema EnumDeclaration(): List EnumBody(): { - List symbols = new ArrayList(); + List symbols = new ArrayList<>(); } { - "{" - [ EnumConstant(symbols) ( LOOKAHEAD(2) "," EnumConstant(symbols) )* ] + "{" { DocCommentHelper.clearDoc(); } + [ EnumConstant(symbols) ( "," EnumConstant(symbols) )* ] "}" { + DocCommentHelper.clearDoc(); return symbols; } } @@ -1157,13 +1212,14 @@ void EnumConstant(List symbols): void ProtocolBody(Protocol p): { + String doc; Schema schema; Message message; Protocol importProtocol; - Map props = new LinkedHashMap(); + Map props = new LinkedHashMap<>(); } { - "{" + "{" { DocCommentHelper.clearDoc(); } ( ((( importProtocol = ImportIdl() | importProtocol = ImportProtocol()) { @@ -1172,21 +1228,26 @@ void ProtocolBody(Protocol p): p.getMessages().putAll(importProtocol.getMessages()); }) | schema = ImportSchema() - ) + ) { + DocCommentHelper.clearDoc(); + } | + doc = Documentation() ( SchemaProperty(props) )* ( - schema = NamedSchemaDeclaration(props) + schema = NamedSchemaDeclaration(doc, props) | - message = MessageDeclaration(p, props) { + message = MessageDeclaration(p, doc, props) { p.getMessages().put(message.getName(), message); } - ) { props.clear(); } + ) { + props.clear(); + } ) * "}" - { p.setTypes(names.values()); + DocCommentHelper.clearDoc(); } } @@ -1197,13 +1258,8 @@ Protocol ImportIdl() : { { importFile = JsonString() ";" { - try { - Idl idl = new Idl(findFile(importFile), this); - try { - return idl.CompilationUnit(); - } finally { - idl.close(); - } + try (Idl idl=new Idl(findFile(importFile), this)){ + return idl.CompilationUnit(); } catch (IOException e) { throw error("Error importing "+importFile+": "+e, token); } @@ -1216,14 +1272,8 @@ Protocol ImportProtocol() : { { importFile = JsonString() ";" { - - try { - InputStream stream = findFile(importFile).openStream(); - try { - return Protocol.parse(stream); - } finally { - stream.close(); - } + try (InputStream stream=findFile(importFile).openStream()) { + return Protocol.parse(stream); } catch (IOException e) { throw error("Error importing "+importFile+": "+e, token); } @@ -1236,24 +1286,21 @@ Schema ImportSchema() : { { importFile = JsonString() ";" { - try { + try (InputStream stream=findFile(importFile).openStream()){ + // This usage of Schema.Parser should not be changed. + // Remove this whole (old) IDL parser instead. Parser parser = new Schema.Parser(); - parser.addTypes(names); // inherit names - InputStream stream = findFile(importFile).openStream(); - try { - Schema value = parser.parse(stream); - names = parser.getTypes(); // update names - return value; - } finally { - stream.close(); - } + parser.addTypes(names.values()); // inherit names + Schema value = parser.parse(stream); + names = parser.getTypes(); // update names + return value; } catch (IOException e) { throw error("Error importing "+importFile+": "+e, token); } } } -Schema FixedDeclaration(): +Schema FixedDeclaration(String doc): { String name; Token sizeTok; @@ -1262,17 +1309,17 @@ Schema FixedDeclaration(): "fixed" name = Identifier() "(" sizeTok = ")" ";" { - Schema s = Schema.createFixed(name, getDoc(), this.namespace, - Integer.parseInt(sizeTok.image)); + DocCommentHelper.clearDoc(); + Schema s = Schema.createFixed(name, doc, this.namespace, Integer.parseInt(sizeTok.image)); names.put(s.getFullName(), s); return s; } } -Schema RecordDeclaration(): +Schema RecordDeclaration(String doc): { String name; - List fields = new ArrayList(); + List fields = new ArrayList<>(); boolean isError; } { @@ -1282,14 +1329,14 @@ Schema RecordDeclaration(): ) name = Identifier() { - Schema result = Schema.createRecord( - name, getDoc(), this.namespace, isError); + Schema result = Schema.createRecord(name, doc, this.namespace, isError); names.put(result.getFullName(), result); } - "{" + "{" { DocCommentHelper.clearDoc(); } ( FieldDeclaration(fields) )* "}" { + DocCommentHelper.clearDoc(); result.setFields(fields); return result; } @@ -1312,84 +1359,66 @@ private void SchemaProperty(Map properties): void FieldDeclaration(List fields): { + String defaultDoc; Schema type; - Map props = new LinkedHashMap(); } { - // TODO should we be able to specify properties on any Type? - // or just on field declarations as done here - - ( SchemaProperty(props) )* + defaultDoc = Documentation() type = Type() - VariableDeclarator(type, fields) ( "," VariableDeclarator(type, fields) )* - ";" - { - for (String key : props.keySet()) - Accessor.addProp(type, key, props.get(key)); - } + VariableDeclarator(type, defaultDoc, fields) ( "," VariableDeclarator(type, defaultDoc, fields) )* + ";" { DocCommentHelper.clearDoc(); } } -void VariableDeclarator(Schema type, List fields): +void VariableDeclarator(Schema type, String defaultDoc, List fields): { - String name; + String doc, name; JsonNode defaultValue = null; - Map props = new LinkedHashMap(); + Map props = new LinkedHashMap<>(); } { - ( SchemaProperty(props) )* - + doc = Documentation() + ( SchemaProperty(props) )* name = Identifier() - - [ defaultValue=Json() ] - + [ defaultValue=Json() ] { Field.Order order = Field.Order.ASCENDING; for (String key : props.keySet()) if ("order".equals(key)) order = Field.Order.valueOf(getTextProp(key,props,token).toUpperCase()); - boolean validate = !SchemaResolver.isUnresolvedSchema(type); - Field field = Accessor.createField(name, type, getDoc(), defaultValue, validate, order); + boolean validate = SchemaResolver.isFullyResolvedSchema(type); + Schema fieldType = fixOptionalSchema(type, defaultValue); + Field field = Accessor.createField(name, fieldType, doc == null ? defaultDoc : doc, defaultValue, validate, order); for (String key : props.keySet()) if ("order".equals(key)) { // already handled: ignore } else if ("aliases".equals(key)) { // aliases for (String alias : getTextProps("aliases", props, token)) field.addAlias(alias); - } else { // add all other props + } else { // add all other properties Accessor.addProp(field, key, props.get(key)); } fields.add(field); + DocCommentHelper.clearDoc(); } } -String MessageDocumentation(): -{} -{ - // Don't parse anything, just return the doc string - { - return getDoc(); - } -} - -private Message MessageDeclaration(Protocol p, Map props): +private Message MessageDeclaration(Protocol p, String msgDoc, Map props): { - String msgDoc; String name; Schema request; Schema response; boolean oneWay = false; - List errorSchemata = new ArrayList(); + List errorSchemata = new ArrayList<>(); errorSchemata.add(Protocol.SYSTEM_ERROR); } { - msgDoc = MessageDocumentation() - response = ResultType() - name = Identifier() + response = ResultType() name = Identifier() request = FormalParameters() [ "oneway" {oneWay = true; } | "throws" ErrorList(errorSchemata) ] ";" { + DocCommentHelper.clearDoc(); Schema errors = Schema.createUnion(errorSchemata); if (oneWay && response.getType() != Type.NULL) throw error("One-way message'"+name+"' must return void", token); @@ -1411,44 +1440,89 @@ void ErrorList(List errors): Schema FormalParameters(): { - List fields = new ArrayList(); + List fields = new ArrayList<>(); } { - ( - "(" [ FormalParameter(fields) ( "," FormalParameter(fields) )* ] ")" - ) + "(" { DocCommentHelper.clearDoc(); } + [ FormalParameter(fields) ( "," FormalParameter(fields) )* ] ")" { - return Schema.createRecord(fields); + DocCommentHelper.clearDoc(); + return Schema.createRecord(null, null, null, false, fields); } } void FormalParameter(List fields): { + String doc; Schema type; } { + doc = Documentation() type = Type() - VariableDeclarator(type, fields) + VariableDeclarator(type, doc, fields) } Schema Type(): { Schema s; - Map props = new LinkedHashMap(); + Map props = new LinkedHashMap<>(); } { - ( SchemaProperty(props) )* + s = UnannotatedType(props) + { + return s; + } +} + +Schema UnannotatedType(Map props): +{ + Schema s; +} +{ ( - LOOKAHEAD(2) s = ReferenceType() - | s = PrimitiveType() - | s = UnionDefinition() - | s = ArrayType() - | s = MapType() + s = NullableType(props) + | ( + s = UnionDefinition() + | s = ArrayType() + | s = MapType() + ) + { + // NullableType also applies properties, inside any union with null it may create. + for (String key : props.keySet()) + Accessor.addProp(s, key, props.get(key)); + LogicalType logicalType = LogicalTypes.fromSchemaIgnoreInvalid(s); + if (logicalType != null) + logicalType.addToSchema(s); + } ) { + return s; + } +} + +Schema NullableType(Map props): +{ + Schema s; + boolean optional = false; +} +{ + ( + s = ReferenceType() { if (!props.isEmpty()) { throw error("Type references may not be annotated", token); } } + | s = PrimitiveType() + ) [ { optional = true; } ] + { + // By applying the properties here (before creating the union), type annotations modify the optional type instead of the union. for (String key : props.keySet()) Accessor.addProp(s, key, props.get(key)); + LogicalType logicalType = LogicalTypes.fromSchemaIgnoreInvalid(s); + if (logicalType != null) + logicalType.addToSchema(s); + if (optional) { + s = Schema.createUnion(Schema.create(Schema.Type.NULL), s); + // Add a marker property to the union (it will be removed when creating fields) + Accessor.addProp(s, OPTIONAL_NULLABLE_TYPE_PROPERTY, BooleanNode.TRUE); + } return s; } } @@ -1485,10 +1559,8 @@ Schema ReferenceType(): StringBuilder sb = new StringBuilder(); } { - ( - part = Identifier() { sb.append(part); } - ("." tok = AnyIdentifier() { sb.append(".").append(tok.image); })* - ) + part = Identifier() { sb.append(part); } + ("." tok = AnyIdentifier() { sb.append(".").append(tok.image); })* { String name = sb.toString(); if ((name.indexOf('.') == -1) && namespace != null) @@ -1503,7 +1575,7 @@ Schema ReferenceType(): } Schema PrimitiveType(): -{} +{ Schema s; } { "boolean" { return Schema.create(Type.BOOLEAN); } | "bytes" { return Schema.create(Type.BYTES); } @@ -1517,7 +1589,8 @@ Schema PrimitiveType(): | "time_ms" { return LogicalTypes.timeMillis().addToSchema(Schema.create(Type.INT)); } | "timestamp_ms" { return LogicalTypes.timestampMillis().addToSchema(Schema.create(Type.LONG)); } | "local_timestamp_ms" { return LogicalTypes.localTimestampMillis().addToSchema(Schema.create(Type.LONG)); } -| "decimal" { return DecimalTypeProperties(); } +| "decimal" s = DecimalTypeProperties() { return s; } +| "big_decimal" { return LogicalTypes.bigDecimal().addToSchema(Schema.create(Type.BYTES)); } | "uuid" {return LogicalTypes.uuid().addToSchema(Schema.create(Type.STRING));} } @@ -1542,22 +1615,22 @@ Schema ResultType(): Schema schema; } { - LOOKAHEAD(2) "void" { return Schema.create(Type.NULL); } - | schema = Type() { return schema; } + | schema = UnannotatedType(Collections.emptyMap()) { return schema; } } String PropertyName(): { Token t; + String s; StringBuilder name = new StringBuilder(); } { - t = { name.append(t.image); } + s = Identifier() { name.append(s); } ( t = { name.append(t.image); } - t = { name.append(t.image); } | + s = Identifier() { name.append(s); } | t = { name.append(t.image); } - t = { name.append(t.image); } + s = Identifier() { name.append(s); } ) * { return name.toString(); } } @@ -1586,23 +1659,29 @@ Token AnyIdentifier(): t = | t = | t = | + t = | + t = | t = | t = | t = | + t = | t = | t = | + t = | t = | t = | t = | t = | t = | t = | + t = | t = | t = | t =